• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*
18  * UTF-8 and Unicode string manipulation, plus java/lang/String convenience
19  * functions.
20  *
21  * In most cases we populate the fields in the String object directly,
22  * rather than going through an instance field lookup.
23  */
24 #include "Dalvik.h"
25 #include <stdlib.h>
26 
27 /*
28  * Initialize string globals.
29  *
30  * This isn't part of the VM init sequence because it's hard to get the
31  * timing right -- we need it to happen after java/lang/String has been
32  * loaded, but before anybody wants to use a string.  It's easiest to
33  * just initialize it on first use.
34  *
35  * In some unusual circumstances (e.g. trying to throw an exception because
36  * String implements java/lang/CharSequence, but CharSequence doesn't exist)
37  * we can try to create an exception string internally before anything has
38  * really tried to use String.  In that case we basically self-destruct.
39  *
40  * We're expecting to be essentially single-threaded at this point.
41  * We employ atomics to ensure everything is observed correctly, and also
42  * to guarantee that we do detect a problem if our assumption is wrong.
43  */
stringStartup()44 static bool stringStartup()
45 {
46     if (gDvm.javaLangStringReady < 0) {
47         LOGE("ERROR: reentrant string initialization\n");
48         assert(false);
49         return false;
50     }
51 
52     if (android_atomic_acquire_cas(0, -1, &gDvm.javaLangStringReady) != 0) {
53         LOGE("ERROR: initial string-ready state not 0 (%d)\n",
54             gDvm.javaLangStringReady);
55         return false;
56     }
57 
58     if (gDvm.classJavaLangString == NULL)
59         gDvm.classJavaLangString =
60             dvmFindSystemClassNoInit("Ljava/lang/String;");
61 
62     gDvm.offJavaLangString_value =
63         dvmFindFieldOffset(gDvm.classJavaLangString, "value", "[C");
64     gDvm.offJavaLangString_count =
65         dvmFindFieldOffset(gDvm.classJavaLangString, "count", "I");
66     gDvm.offJavaLangString_offset =
67         dvmFindFieldOffset(gDvm.classJavaLangString, "offset", "I");
68     gDvm.offJavaLangString_hashCode =
69         dvmFindFieldOffset(gDvm.classJavaLangString, "hashCode", "I");
70 
71     if (gDvm.offJavaLangString_value < 0 ||
72         gDvm.offJavaLangString_count < 0 ||
73         gDvm.offJavaLangString_offset < 0 ||
74         gDvm.offJavaLangString_hashCode < 0)
75     {
76         LOGE("VM-required field missing from java/lang/String\n");
77         return false;
78     }
79 
80     bool badValue = false;
81     if (gDvm.offJavaLangString_value != STRING_FIELDOFF_VALUE) {
82         LOGE("InlineNative: String.value offset = %d, expected %d\n",
83             gDvm.offJavaLangString_value, STRING_FIELDOFF_VALUE);
84         badValue = true;
85     }
86     if (gDvm.offJavaLangString_count != STRING_FIELDOFF_COUNT) {
87         LOGE("InlineNative: String.count offset = %d, expected %d\n",
88             gDvm.offJavaLangString_count, STRING_FIELDOFF_COUNT);
89         badValue = true;
90     }
91     if (gDvm.offJavaLangString_offset != STRING_FIELDOFF_OFFSET) {
92         LOGE("InlineNative: String.offset offset = %d, expected %d\n",
93             gDvm.offJavaLangString_offset, STRING_FIELDOFF_OFFSET);
94         badValue = true;
95     }
96     if (gDvm.offJavaLangString_hashCode != STRING_FIELDOFF_HASHCODE) {
97         LOGE("InlineNative: String.hashCode offset = %d, expected %d\n",
98             gDvm.offJavaLangString_hashCode, STRING_FIELDOFF_HASHCODE);
99         badValue = true;
100     }
101     if (badValue)
102         return false;
103 
104     android_atomic_release_store(1, &gDvm.javaLangStringReady);
105 
106     return true;
107 }
108 
109 /*
110  * Discard heap-allocated storage.
111  */
dvmStringShutdown()112 void dvmStringShutdown()
113 {
114     // currently unused
115 }
116 
117 /*
118  * Compute a hash code on a UTF-8 string, for use with internal hash tables.
119  *
120  * This may or may not yield the same results as the java/lang/String
121  * computeHashCode() function.  (To make sure this doesn't get abused,
122  * I'm initializing the hash code to 1 so they *don't* match up.)
123  *
124  * It would be more correct to invoke dexGetUtf16FromUtf8() here and compute
125  * the hash with the result.  That way, if something encoded the same
126  * character in two different ways, the hash value would be the same.  For
127  * our purposes that isn't necessary.
128  */
dvmComputeUtf8Hash(const char * utf8Str)129 u4 dvmComputeUtf8Hash(const char* utf8Str)
130 {
131     u4 hash = 1;
132 
133     while (*utf8Str != '\0')
134         hash = hash * 31 + *utf8Str++;
135 
136     return hash;
137 }
138 
139 /*
140  * Like "strlen", but for strings encoded with "modified" UTF-8.
141  *
142  * The value returned is the number of characters, which may or may not
143  * be the same as the number of bytes.
144  *
145  * (If this needs optimizing, try: mask against 0xa0, shift right 5,
146  * get increment {1-3} from table of 8 values.)
147  */
dvmUtf8Len(const char * utf8Str)148 int dvmUtf8Len(const char* utf8Str)
149 {
150     int ic, len = 0;
151 
152     while ((ic = *utf8Str++) != '\0') {
153         len++;
154         if ((ic & 0x80) != 0) {
155             /* two- or three-byte encoding */
156             utf8Str++;
157             if ((ic & 0x20) != 0) {
158                 /* three-byte encoding */
159                 utf8Str++;
160             }
161         }
162     }
163 
164     return len;
165 }
166 
167 /*
168  * Convert a "modified" UTF-8 string to UTF-16.
169  */
dvmConvertUtf8ToUtf16(u2 * utf16Str,const char * utf8Str)170 void dvmConvertUtf8ToUtf16(u2* utf16Str, const char* utf8Str)
171 {
172     while (*utf8Str != '\0')
173         *utf16Str++ = dexGetUtf16FromUtf8(&utf8Str);
174 }
175 
176 /*
177  * Given a UTF-16 string, compute the length of the corresponding UTF-8
178  * string in bytes.
179  */
utf16_utf8ByteLen(const u2 * utf16Str,int len)180 static int utf16_utf8ByteLen(const u2* utf16Str, int len)
181 {
182     int utf8Len = 0;
183 
184     while (len--) {
185         unsigned int uic = *utf16Str++;
186 
187         /*
188          * The most common case is (uic > 0 && uic <= 0x7f).
189          */
190         if (uic == 0 || uic > 0x7f) {
191             if (uic > 0x07ff)
192                 utf8Len += 3;
193             else /*(uic > 0x7f || uic == 0) */
194                 utf8Len += 2;
195         } else
196             utf8Len++;
197     }
198     return utf8Len;
199 }
200 
201 /*
202  * Convert a UTF-16 string to UTF-8.
203  *
204  * Make sure you allocate "utf8Str" with the result of utf16_utf8ByteLen(),
205  * not just "len".
206  */
convertUtf16ToUtf8(char * utf8Str,const u2 * utf16Str,int len)207 static void convertUtf16ToUtf8(char* utf8Str, const u2* utf16Str, int len)
208 {
209     assert(len >= 0);
210 
211     while (len--) {
212         unsigned int uic = *utf16Str++;
213 
214         /*
215          * The most common case is (uic > 0 && uic <= 0x7f).
216          */
217         if (uic == 0 || uic > 0x7f) {
218             if (uic > 0x07ff) {
219                 *utf8Str++ = (uic >> 12) | 0xe0;
220                 *utf8Str++ = ((uic >> 6) & 0x3f) | 0x80;
221                 *utf8Str++ = (uic & 0x3f) | 0x80;
222             } else /*(uic > 0x7f || uic == 0)*/ {
223                 *utf8Str++ = (uic >> 6) | 0xc0;
224                 *utf8Str++ = (uic & 0x3f) | 0x80;
225             }
226         } else {
227             *utf8Str++ = uic;
228         }
229     }
230 
231     *utf8Str = '\0';
232 }
233 
234 /*
235  * Use the java/lang/String.computeHashCode() algorithm.
236  */
dvmComputeUtf16Hash(const u2 * utf16Str,int len)237 static inline u4 dvmComputeUtf16Hash(const u2* utf16Str, int len)
238 {
239     u4 hash = 0;
240 
241     while (len--)
242         hash = hash * 31 + *utf16Str++;
243 
244     return hash;
245 }
dvmComputeStringHash(const StringObject * strObj)246 u4 dvmComputeStringHash(const StringObject* strObj) {
247     ArrayObject* chars = (ArrayObject*) dvmGetFieldObject((Object*) strObj,
248                                 STRING_FIELDOFF_VALUE);
249     int offset, len;
250 
251     len = dvmGetFieldInt((Object*) strObj, STRING_FIELDOFF_COUNT);
252     offset = dvmGetFieldInt((Object*) strObj, STRING_FIELDOFF_OFFSET);
253 
254     return dvmComputeUtf16Hash((u2*) chars->contents + offset, len);
255 }
256 
257 /*
258  * Create a new java/lang/String object, using the string data in "utf8Str".
259  *
260  * The caller must call dvmReleaseTrackedAlloc() on the return value.
261  *
262  * Returns NULL and throws an exception on failure.
263  */
dvmCreateStringFromCstr(const char * utf8Str)264 StringObject* dvmCreateStringFromCstr(const char* utf8Str)
265 {
266     assert(utf8Str != NULL);
267     return dvmCreateStringFromCstrAndLength(utf8Str, dvmUtf8Len(utf8Str));
268 }
269 
270 /*
271  * Create a java/lang/String from a C string, given its UTF-16 length
272  * (number of UTF-16 code points).
273  *
274  * The caller must call dvmReleaseTrackedAlloc() on the return value.
275  *
276  * Returns NULL and throws an exception on failure.
277  */
dvmCreateStringFromCstrAndLength(const char * utf8Str,u4 utf16Length)278 StringObject* dvmCreateStringFromCstrAndLength(const char* utf8Str,
279     u4 utf16Length)
280 {
281     StringObject* newObj;
282     ArrayObject* chars;
283     u4 hashCode = 0;
284 
285     //LOGV("Creating String from '%s'\n", utf8Str);
286     assert(utf8Str != NULL);
287 
288     if (gDvm.javaLangStringReady <= 0) {
289         if (!stringStartup())
290             return NULL;
291     }
292 
293     /* init before alloc */
294     if (!dvmIsClassInitialized(gDvm.classJavaLangString) &&
295         !dvmInitClass(gDvm.classJavaLangString))
296     {
297         return NULL;
298     }
299 
300     newObj = (StringObject*) dvmAllocObject(gDvm.classJavaLangString,
301                 ALLOC_DEFAULT);
302     if (newObj == NULL)
303         return NULL;
304 
305     chars = dvmAllocPrimitiveArray('C', utf16Length, ALLOC_DEFAULT);
306     if (chars == NULL) {
307         dvmReleaseTrackedAlloc((Object*) newObj, NULL);
308         return NULL;
309     }
310     dvmConvertUtf8ToUtf16((u2*)chars->contents, utf8Str);
311     hashCode = dvmComputeUtf16Hash((u2*) chars->contents, utf16Length);
312 
313     dvmSetFieldObject((Object*)newObj, STRING_FIELDOFF_VALUE,
314         (Object*)chars);
315     dvmReleaseTrackedAlloc((Object*) chars, NULL);
316     dvmSetFieldInt((Object*)newObj, STRING_FIELDOFF_COUNT, utf16Length);
317     dvmSetFieldInt((Object*)newObj, STRING_FIELDOFF_HASHCODE, hashCode);
318     /* leave offset set to zero */
319 
320     /* debugging stuff */
321     //dvmDumpObject((Object*)newObj);
322     //printHexDumpEx(ANDROID_LOG_DEBUG, chars->contents, utf16Length * 2,
323     //    kHexDumpMem);
324 
325     /* caller may need to dvmReleaseTrackedAlloc(newObj) */
326     return newObj;
327 }
328 
329 /*
330  * Create a new java/lang/String object, using the Unicode data.
331  */
dvmCreateStringFromUnicode(const u2 * unichars,int len)332 StringObject* dvmCreateStringFromUnicode(const u2* unichars, int len)
333 {
334     StringObject* newObj;
335     ArrayObject* chars;
336     u4 hashCode = 0;
337 
338     /* we allow a null pointer if the length is zero */
339     assert(len == 0 || unichars != NULL);
340 
341     if (gDvm.javaLangStringReady <= 0) {
342         if (!stringStartup())
343             return NULL;
344     }
345 
346     /* init before alloc */
347     if (!dvmIsClassInitialized(gDvm.classJavaLangString) &&
348         !dvmInitClass(gDvm.classJavaLangString))
349     {
350         return NULL;
351     }
352 
353     newObj = (StringObject*) dvmAllocObject(gDvm.classJavaLangString,
354         ALLOC_DEFAULT);
355     if (newObj == NULL)
356         return NULL;
357 
358     chars = dvmAllocPrimitiveArray('C', len, ALLOC_DEFAULT);
359     if (chars == NULL) {
360         dvmReleaseTrackedAlloc((Object*) newObj, NULL);
361         return NULL;
362     }
363     if (len > 0)
364         memcpy(chars->contents, unichars, len * sizeof(u2));
365     hashCode = dvmComputeUtf16Hash((u2*) chars->contents, len);
366 
367     dvmSetFieldObject((Object*)newObj, STRING_FIELDOFF_VALUE,
368         (Object*)chars);
369     dvmReleaseTrackedAlloc((Object*) chars, NULL);
370     dvmSetFieldInt((Object*)newObj, STRING_FIELDOFF_COUNT, len);
371     dvmSetFieldInt((Object*)newObj, STRING_FIELDOFF_HASHCODE, hashCode);
372     /* leave offset set to zero */
373 
374     /* debugging stuff */
375     //dvmDumpObject((Object*)newObj);
376     //printHexDumpEx(ANDROID_LOG_DEBUG, chars->contents, len*2, kHexDumpMem);
377 
378     /* caller must dvmReleaseTrackedAlloc(newObj) */
379     return newObj;
380 }
381 
382 /*
383  * Create a new C string from a java/lang/String object.
384  *
385  * Returns NULL if the object is NULL.
386  */
dvmCreateCstrFromString(StringObject * jstr)387 char* dvmCreateCstrFromString(StringObject* jstr)
388 {
389     char* newStr;
390     ArrayObject* chars;
391     int len, byteLen, offset;
392     const u2* data;
393 
394     assert(gDvm.javaLangStringReady > 0);
395 
396     if (jstr == NULL)
397         return NULL;
398 
399     len = dvmGetFieldInt((Object*) jstr, STRING_FIELDOFF_COUNT);
400     offset = dvmGetFieldInt((Object*) jstr, STRING_FIELDOFF_OFFSET);
401     chars = (ArrayObject*) dvmGetFieldObject((Object*) jstr,
402                                 STRING_FIELDOFF_VALUE);
403     data = (const u2*) chars->contents + offset;
404     assert(offset + len <= (int) chars->length);
405 
406     byteLen = utf16_utf8ByteLen(data, len);
407     newStr = (char*) malloc(byteLen+1);
408     if (newStr == NULL)
409         return NULL;
410     convertUtf16ToUtf8(newStr, data, len);
411 
412     return newStr;
413 }
414 
415 /*
416  * Create a UTF-8 C string from a region of a java/lang/String.  (Used by
417  * the JNI GetStringUTFRegion call.)
418  */
dvmCreateCstrFromStringRegion(StringObject * jstr,int start,int len,char * buf)419 void dvmCreateCstrFromStringRegion(StringObject* jstr, int start, int len,
420     char* buf)
421 {
422     const u2* data;
423 
424     data = dvmStringChars(jstr) + start;
425     convertUtf16ToUtf8(buf, data, len);
426 }
427 
428 /*
429  * Compute the length, in modified UTF-8, of a java/lang/String object.
430  *
431  * Does not include the terminating null byte.
432  */
dvmStringUtf8ByteLen(StringObject * jstr)433 int dvmStringUtf8ByteLen(StringObject* jstr)
434 {
435     ArrayObject* chars;
436     int len, offset;
437     const u2* data;
438 
439     assert(gDvm.javaLangStringReady > 0);
440 
441     if (jstr == NULL)
442         return 0;       // should we throw something?  assert?
443 
444     len = dvmGetFieldInt((Object*) jstr, STRING_FIELDOFF_COUNT);
445     offset = dvmGetFieldInt((Object*) jstr, STRING_FIELDOFF_OFFSET);
446     chars = (ArrayObject*) dvmGetFieldObject((Object*) jstr,
447                                 STRING_FIELDOFF_VALUE);
448     data = (const u2*) chars->contents + offset;
449     assert(offset + len <= (int) chars->length);
450 
451     return utf16_utf8ByteLen(data, len);
452 }
453 
454 /*
455  * Get the string's length.
456  */
dvmStringLen(StringObject * jstr)457 int dvmStringLen(StringObject* jstr)
458 {
459     return dvmGetFieldInt((Object*) jstr, STRING_FIELDOFF_COUNT);
460 }
461 
462 /*
463  * Get the char[] object from the String.
464  */
dvmStringCharArray(StringObject * jstr)465 ArrayObject* dvmStringCharArray(StringObject* jstr)
466 {
467     return (ArrayObject*) dvmGetFieldObject((Object*) jstr,
468                                 STRING_FIELDOFF_VALUE);
469 }
470 
471 /*
472  * Get the string's data.
473  */
dvmStringChars(StringObject * jstr)474 const u2* dvmStringChars(StringObject* jstr)
475 {
476     ArrayObject* chars;
477     int offset;
478 
479     offset = dvmGetFieldInt((Object*) jstr, STRING_FIELDOFF_OFFSET);
480     chars = (ArrayObject*) dvmGetFieldObject((Object*) jstr,
481                                 STRING_FIELDOFF_VALUE);
482     return (const u2*) chars->contents + offset;
483 }
484 
485 
486 /*
487  * Compare two String objects.
488  *
489  * This is a dvmHashTableLookup() callback.  The function has already
490  * compared their hash values; we need to do a full compare to ensure
491  * that the strings really match.
492  */
dvmHashcmpStrings(const void * vstrObj1,const void * vstrObj2)493 int dvmHashcmpStrings(const void* vstrObj1, const void* vstrObj2)
494 {
495     const StringObject* strObj1 = (const StringObject*) vstrObj1;
496     const StringObject* strObj2 = (const StringObject*) vstrObj2;
497     ArrayObject* chars1;
498     ArrayObject* chars2;
499     int len1, len2, offset1, offset2;
500 
501     assert(gDvm.javaLangStringReady > 0);
502 
503     /* get offset and length into char array; all values are in 16-bit units */
504     len1 = dvmGetFieldInt((Object*) strObj1, STRING_FIELDOFF_COUNT);
505     offset1 = dvmGetFieldInt((Object*) strObj1, STRING_FIELDOFF_OFFSET);
506     len2 = dvmGetFieldInt((Object*) strObj2, STRING_FIELDOFF_COUNT);
507     offset2 = dvmGetFieldInt((Object*) strObj2, STRING_FIELDOFF_OFFSET);
508     if (len1 != len2)
509         return len1 - len2;
510 
511     chars1 = (ArrayObject*) dvmGetFieldObject((Object*) strObj1,
512                                 STRING_FIELDOFF_VALUE);
513     chars2 = (ArrayObject*) dvmGetFieldObject((Object*) strObj2,
514                                 STRING_FIELDOFF_VALUE);
515 
516     /* damage here actually indicates a broken java/lang/String */
517     assert(offset1 + len1 <= (int) chars1->length);
518     assert(offset2 + len2 <= (int) chars2->length);
519 
520     return memcmp((const u2*) chars1->contents + offset1,
521                   (const u2*) chars2->contents + offset2,
522                   len1 * sizeof(u2));
523 }
524