• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2011 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "descriptors_names.h"
18 
19 #include <algorithm>
20 
21 #include "android-base/stringprintf.h"
22 #include "android-base/strings.h"
23 
24 #include "base/macros.h"
25 #include "dex/utf-inl.h"
26 
27 namespace art {
28 
29 using android::base::StringAppendF;
30 
AppendPrettyDescriptor(const char * descriptor,std::string * result)31 void AppendPrettyDescriptor(const char* descriptor, std::string* result) {
32   // Count the number of '['s to get the dimensionality.
33   const char* c = descriptor;
34   size_t dim = 0;
35   while (*c == '[') {
36     dim++;
37     c++;
38   }
39 
40   // Reference or primitive?
41   bool primitive = false;
42   if (*c == 'L') {
43     // "[[La/b/C;" -> "a.b.C[][]".
44     c++;  // Skip the 'L'.
45   } else {
46     primitive = true;
47     // "[[B" -> "byte[][]".
48     switch (*c) {
49       case 'B':
50         c = "byte";
51         break;
52       case 'C':
53         c = "char";
54         break;
55       case 'D':
56         c = "double";
57         break;
58       case 'F':
59         c = "float";
60         break;
61       case 'I':
62         c = "int";
63         break;
64       case 'J':
65         c = "long";
66         break;
67       case 'S':
68         c = "short";
69         break;
70       case 'Z':
71         c = "boolean";
72         break;
73       case 'V':
74         c = "void";
75         break;  // Used when decoding return types.
76       default: result->append(descriptor); return;
77     }
78   }
79 
80   // At this point, 'c' is a string of the form "fully/qualified/Type;" or
81   // "primitive". In the former case, rewrite the type with '.' instead of '/':
82   std::string temp(c);
83   if (!primitive) {
84     std::replace(temp.begin(), temp.end(), '/', '.');
85     // ...and remove the semicolon:
86     if (temp.back() == ';') {
87       temp.pop_back();
88     }
89   }
90   result->append(temp);
91 
92   // Finally, add 'dim' "[]" pairs:
93   for (size_t i = 0; i < dim; ++i) {
94     result->append("[]");
95   }
96 }
97 
PrettyDescriptor(const char * descriptor)98 std::string PrettyDescriptor(const char* descriptor) {
99   std::string result;
100   AppendPrettyDescriptor(descriptor, &result);
101   return result;
102 }
103 
InversePrettyDescriptor(const std::string & pretty_descriptor)104 std::string InversePrettyDescriptor(const std::string& pretty_descriptor) {
105   std::string result;
106 
107   // Used to determine the length of the descriptor without trailing "[]"s.
108   size_t l = pretty_descriptor.length();
109 
110   // Determine dimensionality, and append the necessary leading '['s.
111   size_t dim = 0;
112   size_t pos = 0;
113   static const std::string array_indicator = "[]";
114   while ((pos = pretty_descriptor.find(array_indicator, pos)) != std::string::npos) {
115     if (dim == 0) {
116       l = pos;
117     }
118     ++dim;
119     pos += array_indicator.length();
120   }
121   for (size_t i = 0; i < dim; ++i) {
122     result += '[';
123   }
124 
125   // temp_descriptor is now in the form of "some.pretty.Type" or "primitive".
126   std::string temp_descriptor(pretty_descriptor, 0, l);
127   if (temp_descriptor == "byte") {
128     result += 'B';
129   } else if (temp_descriptor == "char") {
130     result += 'C';
131   } else if (temp_descriptor == "double") {
132     result += 'D';
133   } else if (temp_descriptor == "float") {
134     result += 'F';
135   } else if (temp_descriptor == "int") {
136     result += 'I';
137   } else if (temp_descriptor == "long") {
138     result += 'J';
139   } else if (temp_descriptor == "short") {
140     result += 'S';
141   } else if (temp_descriptor == "boolean") {
142     result += 'Z';
143   } else if (temp_descriptor == "void") {
144     result += 'V';
145   } else {
146     result += 'L';
147     std::replace(temp_descriptor.begin(), temp_descriptor.end(), '.', '/');
148     result += temp_descriptor;
149     result += ';';
150   }
151   return result;
152 }
153 
GetJniShortName(const std::string & class_descriptor,const std::string & method)154 std::string GetJniShortName(const std::string& class_descriptor, const std::string& method) {
155   // Remove the leading 'L' and trailing ';'...
156   std::string class_name(class_descriptor);
157   CHECK_EQ(class_name[0], 'L') << class_name;
158   CHECK_EQ(class_name[class_name.size() - 1], ';') << class_name;
159   class_name.erase(0, 1);
160   class_name.erase(class_name.size() - 1, 1);
161 
162   std::string short_name;
163   short_name += "Java_";
164   short_name += MangleForJni(class_name);
165   short_name += "_";
166   short_name += MangleForJni(method);
167   return short_name;
168 }
169 
170 // See http://java.sun.com/j2se/1.5.0/docs/guide/jni/spec/design.html#wp615 for the full rules.
MangleForJni(const std::string & s)171 std::string MangleForJni(const std::string& s) {
172   std::string result;
173   size_t char_count = CountModifiedUtf8Chars(s.c_str());
174   const char* cp = &s[0];
175   for (size_t i = 0; i < char_count; ++i) {
176     uint32_t ch = GetUtf16FromUtf8(&cp);
177     if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9')) {
178       result.push_back(ch);
179     } else if (ch == '.' || ch == '/') {
180       result += "_";
181     } else if (ch == '_') {
182       result += "_1";
183     } else if (ch == ';') {
184       result += "_2";
185     } else if (ch == '[') {
186       result += "_3";
187     } else {
188       const uint16_t leading = GetLeadingUtf16Char(ch);
189       const uint32_t trailing = GetTrailingUtf16Char(ch);
190 
191       StringAppendF(&result, "_0%04x", leading);
192       if (trailing != 0) {
193         StringAppendF(&result, "_0%04x", trailing);
194       }
195     }
196   }
197   return result;
198 }
199 
DotToDescriptor(const char * class_name)200 std::string DotToDescriptor(const char* class_name) {
201   std::string descriptor(class_name);
202   std::replace(descriptor.begin(), descriptor.end(), '.', '/');
203   if (descriptor.length() > 0 && descriptor[0] != '[') {
204     descriptor = "L" + descriptor + ";";
205   }
206   return descriptor;
207 }
208 
DescriptorToDot(const char * descriptor)209 std::string DescriptorToDot(const char* descriptor) {
210   size_t length = strlen(descriptor);
211   if (length > 1) {
212     if (descriptor[0] == 'L' && descriptor[length - 1] == ';') {
213       // Descriptors have the leading 'L' and trailing ';' stripped.
214       std::string result(descriptor + 1, length - 2);
215       std::replace(result.begin(), result.end(), '/', '.');
216       return result;
217     } else {
218       // For arrays the 'L' and ';' remain intact.
219       std::string result(descriptor);
220       std::replace(result.begin(), result.end(), '/', '.');
221       return result;
222     }
223   }
224   // Do nothing for non-class/array descriptors.
225   return descriptor;
226 }
227 
DescriptorToName(const char * descriptor)228 std::string DescriptorToName(const char* descriptor) {
229   size_t length = strlen(descriptor);
230   if (descriptor[0] == 'L' && descriptor[length - 1] == ';') {
231     std::string result(descriptor + 1, length - 2);
232     return result;
233   }
234   return descriptor;
235 }
236 
237 // Helper for IsValidPartOfMemberNameUtf8(), a bit vector indicating valid low ascii.
238 static constexpr uint32_t DEX_MEMBER_VALID_LOW_ASCII[4] = {
239   0x00000000,  // 00..1f low control characters; nothing valid
240   0x03ff2011,  // 20..3f space, digits and symbols; valid: ' ', '0'..'9', '$', '-'
241   0x87fffffe,  // 40..5f uppercase etc.; valid: 'A'..'Z', '_'
242   0x07fffffe   // 60..7f lowercase etc.; valid: 'a'..'z'
243 };
244 
245 // Helper for IsValidPartOfMemberNameUtf8(); do not call directly.
246 COLD_ATTR
IsValidPartOfMemberNameUtf8Slow(const char ** pUtf8Ptr)247 static bool IsValidPartOfMemberNameUtf8Slow(const char** pUtf8Ptr) {
248   /*
249    * It's a multibyte encoded character. Decode it and analyze. We
250    * accept anything that isn't:
251    *   - an improperly encoded low value
252    *   - an improper surrogate pair
253    *   - an encoded '\0'
254    *   - a C1 control character U+0080..U+009f
255    *   - a format character U+200b..U+200f, U+2028..U+202e
256    *   - a special character U+fff0..U+ffff
257    * Prior to DEX format version 040, we also excluded some of the Unicode
258    * space characters:
259    *   - U+00a0, U+2000..U+200a, U+202f
260    * This is all specified in the dex format document.
261    */
262 
263   const uint32_t pair = GetUtf16FromUtf8(pUtf8Ptr);
264   const uint16_t leading = GetLeadingUtf16Char(pair);
265 
266   // We have a surrogate pair resulting from a valid 4 byte UTF sequence.
267   // No further checks are necessary because 4 byte sequences span code
268   // points [U+10000, U+1FFFFF], which are valid codepoints in a dex
269   // identifier. Furthermore, GetUtf16FromUtf8 guarantees that each of
270   // the surrogate halves are valid and well formed in this instance.
271   if (GetTrailingUtf16Char(pair) != 0) {
272     return true;
273   }
274 
275 
276   // We've encountered a one, two or three byte UTF-8 sequence. The
277   // three byte UTF-8 sequence could be one half of a surrogate pair.
278   switch (leading >> 8) {
279     case 0x00:
280       // It's in the range that has C1 control characters.
281       return (leading >= 0x00a0);
282     case 0xd8:
283     case 0xd9:
284     case 0xda:
285     case 0xdb:
286       {
287         // We found a three byte sequence encoding one half of a surrogate.
288         // Look for the other half.
289         const uint32_t pair2 = GetUtf16FromUtf8(pUtf8Ptr);
290         const uint16_t trailing = GetLeadingUtf16Char(pair2);
291 
292         return (GetTrailingUtf16Char(pair2) == 0) && (0xdc00 <= trailing && trailing <= 0xdfff);
293       }
294     case 0xdc:
295     case 0xdd:
296     case 0xde:
297     case 0xdf:
298       // It's a trailing surrogate, which is not valid at this point.
299       return false;
300     case 0x20:
301     case 0xff:
302       // It's in the range that has format characters and specials.
303       switch (leading & 0xfff8) {
304         case 0x2008:
305           return (leading <= 0x200a);
306         case 0x2028:
307           return (leading == 0x202f);
308         case 0xfff0:
309         case 0xfff8:
310           return false;
311       }
312       return true;
313     default:
314       return true;
315   }
316 
317   UNREACHABLE();
318 }
319 
320 /* Return whether the pointed-at modified-UTF-8 encoded character is
321  * valid as part of a member name, updating the pointer to point past
322  * the consumed character. This will consume two encoded UTF-16 code
323  * points if the character is encoded as a surrogate pair. Also, if
324  * this function returns false, then the given pointer may only have
325  * been partially advanced.
326  */
327 ALWAYS_INLINE
IsValidPartOfMemberNameUtf8(const char ** pUtf8Ptr)328 static bool IsValidPartOfMemberNameUtf8(const char** pUtf8Ptr) {
329   uint8_t c = (uint8_t) **pUtf8Ptr;
330   if (LIKELY(c <= 0x7f)) {
331     // It's low-ascii, so check the table.
332     uint32_t wordIdx = c >> 5;
333     uint32_t bitIdx = c & 0x1f;
334     (*pUtf8Ptr)++;
335     return (DEX_MEMBER_VALID_LOW_ASCII[wordIdx] & (1 << bitIdx)) != 0;
336   }
337 
338   // It's a multibyte encoded character. Call a non-inline function
339   // for the heavy lifting.
340   return IsValidPartOfMemberNameUtf8Slow(pUtf8Ptr);
341 }
342 
IsValidMemberName(const char * s)343 bool IsValidMemberName(const char* s) {
344   bool angle_name = false;
345 
346   switch (*s) {
347     case '\0':
348       // The empty string is not a valid name.
349       return false;
350     case '<':
351       angle_name = true;
352       s++;
353       break;
354   }
355 
356   while (true) {
357     switch (*s) {
358       case '\0':
359         return !angle_name;
360       case '>':
361         return angle_name && s[1] == '\0';
362     }
363 
364     if (!IsValidPartOfMemberNameUtf8(&s)) {
365       return false;
366     }
367   }
368 }
369 
370 enum ClassNameType { kName, kDescriptor };
371 template<ClassNameType kType, char kSeparator>
IsValidClassName(const char * s)372 static bool IsValidClassName(const char* s) {
373   int arrayCount = 0;
374   while (*s == '[') {
375     arrayCount++;
376     s++;
377   }
378 
379   if (arrayCount > 255) {
380     // Arrays may have no more than 255 dimensions.
381     return false;
382   }
383 
384   ClassNameType type = kType;
385   if (type != kDescriptor && arrayCount != 0) {
386     /*
387      * If we're looking at an array of some sort, then it doesn't
388      * matter if what is being asked for is a class name; the
389      * format looks the same as a type descriptor in that case, so
390      * treat it as such.
391      */
392     type = kDescriptor;
393   }
394 
395   if (type == kDescriptor) {
396     /*
397      * We are looking for a descriptor. Either validate it as a
398      * single-character primitive type, or continue on to check the
399      * embedded class name (bracketed by "L" and ";").
400      */
401     switch (*(s++)) {
402     case 'B':
403     case 'C':
404     case 'D':
405     case 'F':
406     case 'I':
407     case 'J':
408     case 'S':
409     case 'Z':
410       // These are all single-character descriptors for primitive types.
411       return (*s == '\0');
412     case 'V':
413       // Non-array void is valid, but you can't have an array of void.
414       return (arrayCount == 0) && (*s == '\0');
415     case 'L':
416       // Class name: Break out and continue below.
417       break;
418     default:
419       // Oddball descriptor character.
420       return false;
421     }
422   }
423 
424   /*
425    * We just consumed the 'L' that introduces a class name as part
426    * of a type descriptor, or we are looking for an unadorned class
427    * name.
428    */
429 
430   bool sepOrFirst = true;  // first character or just encountered a separator.
431   for (;;) {
432     uint8_t c = (uint8_t) *s;
433     switch (c) {
434     case '\0':
435       /*
436        * Premature end for a type descriptor, but valid for
437        * a class name as long as we haven't encountered an
438        * empty component (including the degenerate case of
439        * the empty string "").
440        */
441       return (type == kName) && !sepOrFirst;
442     case ';':
443       /*
444        * Invalid character for a class name, but the
445        * legitimate end of a type descriptor. In the latter
446        * case, make sure that this is the end of the string
447        * and that it doesn't end with an empty component
448        * (including the degenerate case of "L;").
449        */
450       return (type == kDescriptor) && !sepOrFirst && (s[1] == '\0');
451     case '/':
452     case '.':
453       if (c != kSeparator) {
454         // The wrong separator character.
455         return false;
456       }
457       if (sepOrFirst) {
458         // Separator at start or two separators in a row.
459         return false;
460       }
461       sepOrFirst = true;
462       s++;
463       break;
464     default:
465       if (!IsValidPartOfMemberNameUtf8(&s)) {
466         return false;
467       }
468       sepOrFirst = false;
469       break;
470     }
471   }
472 }
473 
IsValidBinaryClassName(const char * s)474 bool IsValidBinaryClassName(const char* s) {
475   return IsValidClassName<kName, '.'>(s);
476 }
477 
IsValidJniClassName(const char * s)478 bool IsValidJniClassName(const char* s) {
479   return IsValidClassName<kName, '/'>(s);
480 }
481 
IsValidDescriptor(const char * s)482 bool IsValidDescriptor(const char* s) {
483   return IsValidClassName<kDescriptor, '/'>(s);
484 }
485 
PrettyDescriptor(Primitive::Type type)486 std::string PrettyDescriptor(Primitive::Type type) {
487   return PrettyDescriptor(Primitive::Descriptor(type));
488 }
489 
490 }  // namespace art
491