• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*
18  * UTF-8 and Unicode string manipulation, plus java/lang/String convenience
19  * functions.
20  *
21  * In most cases we populate the fields in the String object directly,
22  * rather than going through an instance field lookup.
23  */
24 #include "Dalvik.h"
25 #include <stdlib.h>
26 
27 /*
28  * Allocate a new instance of the class String, performing first-use
29  * initialization of the class if necessary. Upon success, the
30  * returned value will have all its fields except hashCode already
31  * filled in, including a reference to a newly-allocated char[] for
32  * the contents, sized as given. Additionally, a reference to the
33  * chars array is stored to the pChars pointer. Callers must
34  * subsequently call dvmReleaseTrackedAlloc() on the result pointer.
35  * This function returns NULL on failure.
36  */
makeStringObject(u4 charsLength,ArrayObject ** pChars)37 static StringObject* makeStringObject(u4 charsLength, ArrayObject** pChars)
38 {
39     /*
40      * The String class should have already gotten found (but not
41      * necessarily initialized) before making it here. We assert it
42      * explicitly, since historically speaking, we have had bugs with
43      * regard to when the class String gets set up. The assert helps
44      * make any regressions easier to diagnose.
45      */
46     assert(gDvm.classJavaLangString != NULL);
47 
48     if (!dvmIsClassInitialized(gDvm.classJavaLangString)) {
49         /* Perform first-time use initialization of the class. */
50         if (!dvmInitClass(gDvm.classJavaLangString)) {
51             ALOGE("FATAL: Could not initialize class String");
52             dvmAbort();
53         }
54     }
55 
56     Object* result = dvmAllocObject(gDvm.classJavaLangString, ALLOC_DEFAULT);
57     if (result == NULL) {
58         return NULL;
59     }
60 
61     ArrayObject* chars = dvmAllocPrimitiveArray('C', charsLength, ALLOC_DEFAULT);
62     if (chars == NULL) {
63         dvmReleaseTrackedAlloc(result, NULL);
64         return NULL;
65     }
66 
67     dvmSetFieldInt(result, STRING_FIELDOFF_COUNT, charsLength);
68     dvmSetFieldObject(result, STRING_FIELDOFF_VALUE, (Object*) chars);
69     dvmReleaseTrackedAlloc((Object*) chars, NULL);
70     /* Leave offset and hashCode set to zero. */
71 
72     *pChars = chars;
73     return (StringObject*) result;
74 }
75 
76 /*
77  * Compute a hash code on a UTF-8 string, for use with internal hash tables.
78  *
79  * This may or may not yield the same results as the java/lang/String
80  * computeHashCode() function.  (To make sure this doesn't get abused,
81  * I'm initializing the hash code to 1 so they *don't* match up.)
82  *
83  * It would be more correct to invoke dexGetUtf16FromUtf8() here and compute
84  * the hash with the result.  That way, if something encoded the same
85  * character in two different ways, the hash value would be the same.  For
86  * our purposes that isn't necessary.
87  */
dvmComputeUtf8Hash(const char * utf8Str)88 u4 dvmComputeUtf8Hash(const char* utf8Str)
89 {
90     u4 hash = 1;
91 
92     while (*utf8Str != '\0')
93         hash = hash * 31 + *utf8Str++;
94 
95     return hash;
96 }
97 
98 /*
99  * Like "strlen", but for strings encoded with "modified" UTF-8.
100  *
101  * The value returned is the number of characters, which may or may not
102  * be the same as the number of bytes.
103  *
104  * (If this needs optimizing, try: mask against 0xa0, shift right 5,
105  * get increment {1-3} from table of 8 values.)
106  */
dvmUtf8Len(const char * utf8Str)107 size_t dvmUtf8Len(const char* utf8Str)
108 {
109     size_t len = 0;
110     int ic;
111 
112     while ((ic = *utf8Str++) != '\0') {
113         len++;
114         if ((ic & 0x80) != 0) {
115             /* two- or three-byte encoding */
116             utf8Str++;
117             if ((ic & 0x20) != 0) {
118                 /* three-byte encoding */
119                 utf8Str++;
120             }
121         }
122     }
123 
124     return len;
125 }
126 
127 /*
128  * Convert a "modified" UTF-8 string to UTF-16.
129  */
dvmConvertUtf8ToUtf16(u2 * utf16Str,const char * utf8Str)130 void dvmConvertUtf8ToUtf16(u2* utf16Str, const char* utf8Str)
131 {
132     while (*utf8Str != '\0')
133         *utf16Str++ = dexGetUtf16FromUtf8(&utf8Str);
134 }
135 
136 /*
137  * Given a UTF-16 string, compute the length of the corresponding UTF-8
138  * string in bytes.
139  */
utf16_utf8ByteLen(const u2 * utf16Str,int len)140 static int utf16_utf8ByteLen(const u2* utf16Str, int len)
141 {
142     int utf8Len = 0;
143 
144     while (len--) {
145         unsigned int uic = *utf16Str++;
146 
147         /*
148          * The most common case is (uic > 0 && uic <= 0x7f).
149          */
150         if (uic == 0 || uic > 0x7f) {
151             if (uic > 0x07ff)
152                 utf8Len += 3;
153             else /*(uic > 0x7f || uic == 0) */
154                 utf8Len += 2;
155         } else
156             utf8Len++;
157     }
158     return utf8Len;
159 }
160 
161 /*
162  * Convert a UTF-16 string to UTF-8.
163  *
164  * Make sure you allocate "utf8Str" with the result of utf16_utf8ByteLen(),
165  * not just "len".
166  */
convertUtf16ToUtf8(char * utf8Str,const u2 * utf16Str,int len)167 static void convertUtf16ToUtf8(char* utf8Str, const u2* utf16Str, int len)
168 {
169     assert(len >= 0);
170 
171     while (len--) {
172         unsigned int uic = *utf16Str++;
173 
174         /*
175          * The most common case is (uic > 0 && uic <= 0x7f).
176          */
177         if (uic == 0 || uic > 0x7f) {
178             if (uic > 0x07ff) {
179                 *utf8Str++ = (uic >> 12) | 0xe0;
180                 *utf8Str++ = ((uic >> 6) & 0x3f) | 0x80;
181                 *utf8Str++ = (uic & 0x3f) | 0x80;
182             } else /*(uic > 0x7f || uic == 0)*/ {
183                 *utf8Str++ = (uic >> 6) | 0xc0;
184                 *utf8Str++ = (uic & 0x3f) | 0x80;
185             }
186         } else {
187             *utf8Str++ = uic;
188         }
189     }
190 
191     *utf8Str = '\0';
192 }
193 
194 /*
195  * Use the java/lang/String.computeHashCode() algorithm.
196  */
computeUtf16Hash(const u2 * utf16Str,size_t len)197 static inline u4 computeUtf16Hash(const u2* utf16Str, size_t len)
198 {
199     u4 hash = 0;
200 
201     while (len--)
202         hash = hash * 31 + *utf16Str++;
203 
204     return hash;
205 }
206 
dvmComputeStringHash(StringObject * strObj)207 u4 dvmComputeStringHash(StringObject* strObj) {
208     int hashCode = dvmGetFieldInt(strObj, STRING_FIELDOFF_HASHCODE);
209     if (hashCode != 0) {
210       return hashCode;
211     }
212     int len = dvmGetFieldInt(strObj, STRING_FIELDOFF_COUNT);
213     int offset = dvmGetFieldInt(strObj, STRING_FIELDOFF_OFFSET);
214     ArrayObject* chars =
215             (ArrayObject*) dvmGetFieldObject(strObj, STRING_FIELDOFF_VALUE);
216     hashCode = computeUtf16Hash((u2*)(void*)chars->contents + offset, len);
217     dvmSetFieldInt(strObj, STRING_FIELDOFF_HASHCODE, hashCode);
218     return hashCode;
219 }
220 
dvmCreateStringFromCstr(const char * utf8Str)221 StringObject* dvmCreateStringFromCstr(const char* utf8Str) {
222     assert(utf8Str != NULL);
223     return dvmCreateStringFromCstrAndLength(utf8Str, dvmUtf8Len(utf8Str));
224 }
225 
dvmCreateStringFromCstr(const std::string & utf8Str)226 StringObject* dvmCreateStringFromCstr(const std::string& utf8Str) {
227     return dvmCreateStringFromCstr(utf8Str.c_str());
228 }
229 
230 /*
231  * Create a java/lang/String from a C string, given its UTF-16 length
232  * (number of UTF-16 code points).
233  *
234  * The caller must call dvmReleaseTrackedAlloc() on the return value.
235  *
236  * Returns NULL and throws an exception on failure.
237  */
dvmCreateStringFromCstrAndLength(const char * utf8Str,size_t utf16Length)238 StringObject* dvmCreateStringFromCstrAndLength(const char* utf8Str,
239     size_t utf16Length)
240 {
241     assert(utf8Str != NULL);
242 
243     ArrayObject* chars;
244     StringObject* newObj = makeStringObject(utf16Length, &chars);
245     if (newObj == NULL) {
246         return NULL;
247     }
248 
249     dvmConvertUtf8ToUtf16((u2*)(void*)chars->contents, utf8Str);
250 
251     u4 hashCode = computeUtf16Hash((u2*)(void*)chars->contents, utf16Length);
252     dvmSetFieldInt((Object*) newObj, STRING_FIELDOFF_HASHCODE, hashCode);
253 
254     return newObj;
255 }
256 
257 /*
258  * Create a new java/lang/String object, using the given Unicode data.
259  */
dvmCreateStringFromUnicode(const u2 * unichars,int len)260 StringObject* dvmCreateStringFromUnicode(const u2* unichars, int len)
261 {
262     /* We allow a NULL pointer if the length is zero. */
263     assert(len == 0 || unichars != NULL);
264 
265     ArrayObject* chars;
266     StringObject* newObj = makeStringObject(len, &chars);
267     if (newObj == NULL) {
268         return NULL;
269     }
270 
271     if (len > 0) memcpy(chars->contents, unichars, len * sizeof(u2));
272 
273     u4 hashCode = computeUtf16Hash((u2*)(void*)chars->contents, len);
274     dvmSetFieldInt((Object*)newObj, STRING_FIELDOFF_HASHCODE, hashCode);
275 
276     return newObj;
277 }
278 
279 /*
280  * Create a new C string from a java/lang/String object.
281  *
282  * Returns NULL if the object is NULL.
283  */
dvmCreateCstrFromString(const StringObject * jstr)284 char* dvmCreateCstrFromString(const StringObject* jstr)
285 {
286     assert(gDvm.classJavaLangString != NULL);
287     if (jstr == NULL) {
288         return NULL;
289     }
290 
291     int len = dvmGetFieldInt(jstr, STRING_FIELDOFF_COUNT);
292     int offset = dvmGetFieldInt(jstr, STRING_FIELDOFF_OFFSET);
293     ArrayObject* chars =
294             (ArrayObject*) dvmGetFieldObject(jstr, STRING_FIELDOFF_VALUE);
295     const u2* data = (const u2*)(void*)chars->contents + offset;
296     assert(offset + len <= (int) chars->length);
297 
298     int byteLen = utf16_utf8ByteLen(data, len);
299     char* newStr = (char*) malloc(byteLen+1);
300     if (newStr == NULL) {
301         return NULL;
302     }
303     convertUtf16ToUtf8(newStr, data, len);
304 
305     return newStr;
306 }
307 
dvmGetStringUtfRegion(const StringObject * jstr,int start,int len,char * buf)308 void dvmGetStringUtfRegion(const StringObject* jstr,
309         int start, int len, char* buf)
310 {
311     const u2* data = jstr->chars() + start;
312     convertUtf16ToUtf8(buf, data, len);
313 }
314 
utfLength() const315 int StringObject::utfLength() const
316 {
317     assert(gDvm.classJavaLangString != NULL);
318 
319     int len = dvmGetFieldInt(this, STRING_FIELDOFF_COUNT);
320     int offset = dvmGetFieldInt(this, STRING_FIELDOFF_OFFSET);
321     ArrayObject* chars =
322             (ArrayObject*) dvmGetFieldObject(this, STRING_FIELDOFF_VALUE);
323     const u2* data = (const u2*)(void*)chars->contents + offset;
324     assert(offset + len <= (int) chars->length);
325 
326     return utf16_utf8ByteLen(data, len);
327 }
328 
length() const329 int StringObject::length() const
330 {
331     return dvmGetFieldInt(this, STRING_FIELDOFF_COUNT);
332 }
333 
array() const334 ArrayObject* StringObject::array() const
335 {
336     return (ArrayObject*) dvmGetFieldObject(this, STRING_FIELDOFF_VALUE);
337 }
338 
chars() const339 const u2* StringObject::chars() const
340 {
341     int offset = dvmGetFieldInt(this, STRING_FIELDOFF_OFFSET);
342     ArrayObject* chars =
343             (ArrayObject*) dvmGetFieldObject(this, STRING_FIELDOFF_VALUE);
344     return (const u2*)(void*)chars->contents + offset;
345 }
346 
347 
348 /*
349  * Compare two String objects.
350  *
351  * This is a dvmHashTableLookup() callback.  The function has already
352  * compared their hash values; we need to do a full compare to ensure
353  * that the strings really match.
354  */
dvmHashcmpStrings(const void * vstrObj1,const void * vstrObj2)355 int dvmHashcmpStrings(const void* vstrObj1, const void* vstrObj2)
356 {
357     const StringObject* strObj1 = (const StringObject*) vstrObj1;
358     const StringObject* strObj2 = (const StringObject*) vstrObj2;
359 
360     assert(gDvm.classJavaLangString != NULL);
361 
362     /* get offset and length into char array; all values are in 16-bit units */
363     int len1 = dvmGetFieldInt(strObj1, STRING_FIELDOFF_COUNT);
364     int offset1 = dvmGetFieldInt(strObj1, STRING_FIELDOFF_OFFSET);
365     int len2 = dvmGetFieldInt(strObj2, STRING_FIELDOFF_COUNT);
366     int offset2 = dvmGetFieldInt(strObj2, STRING_FIELDOFF_OFFSET);
367     if (len1 != len2) {
368         return len1 - len2;
369     }
370 
371     ArrayObject* chars1 =
372             (ArrayObject*) dvmGetFieldObject(strObj1, STRING_FIELDOFF_VALUE);
373     ArrayObject* chars2 =
374             (ArrayObject*) dvmGetFieldObject(strObj2, STRING_FIELDOFF_VALUE);
375 
376     /* damage here actually indicates a broken java/lang/String */
377     assert(offset1 + len1 <= (int) chars1->length);
378     assert(offset2 + len2 <= (int) chars2->length);
379 
380     return memcmp((const u2*)(void*)chars1->contents + offset1,
381                   (const u2*)(void*)chars2->contents + offset2,
382                   len1 * sizeof(u2));
383 }
384 
dvmCreateStringArray(const std::vector<std::string> & strings)385 ArrayObject* dvmCreateStringArray(const std::vector<std::string>& strings) {
386     Thread* self = dvmThreadSelf();
387 
388     // Allocate an array to hold the String objects.
389     ClassObject* elementClass = dvmFindArrayClassForElement(gDvm.classJavaLangString);
390     ArrayObject* stringArray = dvmAllocArrayByClass(elementClass, strings.size(), ALLOC_DEFAULT);
391     if (stringArray == NULL) {
392         // Probably OOM.
393         assert(dvmCheckException(self));
394         return NULL;
395     }
396 
397     // Create the individual String objects and add them to the array.
398     for (size_t i = 0; i < strings.size(); i++) {
399         Object* str = (Object*) dvmCreateStringFromCstr(strings[i]);
400         if (str == NULL) {
401             // Probably OOM; drop out now.
402             assert(dvmCheckException(self));
403             dvmReleaseTrackedAlloc((Object*) stringArray, self);
404             return NULL;
405         }
406         dvmSetObjectArrayElement(stringArray, i, str);
407         /* stored in tracked array, okay to release */
408         dvmReleaseTrackedAlloc(str, self);
409     }
410 
411     return stringArray;
412 }
413