• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2007 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.dx.rop.cst;
18 
19 import com.android.dx.rop.type.Type;
20 import com.android.dx.util.ByteArray;
21 import com.android.dx.util.Hex;
22 
23 /**
24  * Constants of type {@code CONSTANT_Utf8_info} or {@code CONSTANT_String_info}.
25  */
26 public final class CstString extends TypedConstant {
27     /**
28      * {@code non-null;} instance representing {@code ""}, that is, the
29      * empty string
30      */
31     public static final CstString EMPTY_STRING = new CstString("");
32 
33     /** {@code non-null;} the UTF-8 value as a string */
34     private final String string;
35 
36     /** {@code non-null;} the UTF-8 value as bytes */
37     private final ByteArray bytes;
38 
39     /**
40      * Converts a string into its MUTF-8 form. MUTF-8 differs from normal UTF-8
41      * in the handling of character '\0' and surrogate pairs.
42      *
43      * @param string {@code non-null;} the string to convert
44      * @return {@code non-null;} the UTF-8 bytes for it
45      */
stringToUtf8Bytes(String string)46     public static byte[] stringToUtf8Bytes(String string) {
47         int len = string.length();
48         byte[] bytes = new byte[len * 3]; // Avoid having to reallocate.
49         int outAt = 0;
50 
51         for (int i = 0; i < len; i++) {
52             char c = string.charAt(i);
53             if ((c != 0) && (c < 0x80)) {
54                 bytes[outAt] = (byte) c;
55                 outAt++;
56             } else if (c < 0x800) {
57                 bytes[outAt] = (byte) (((c >> 6) & 0x1f) | 0xc0);
58                 bytes[outAt + 1] = (byte) ((c & 0x3f) | 0x80);
59                 outAt += 2;
60             } else {
61                 bytes[outAt] = (byte) (((c >> 12) & 0x0f) | 0xe0);
62                 bytes[outAt + 1] = (byte) (((c >> 6) & 0x3f) | 0x80);
63                 bytes[outAt + 2] = (byte) ((c & 0x3f) | 0x80);
64                 outAt += 3;
65             }
66         }
67 
68         byte[] result = new byte[outAt];
69         System.arraycopy(bytes, 0, result, 0, outAt);
70         return result;
71     }
72 
73     /**
74      * Converts an array of UTF-8 bytes into a string.
75      *
76      * @param bytes {@code non-null;} the bytes to convert
77      * @return {@code non-null;} the converted string
78      */
utf8BytesToString(ByteArray bytes)79     public static String utf8BytesToString(ByteArray bytes) {
80         int length = bytes.size();
81         char[] chars = new char[length]; // This is sized to avoid a realloc.
82         int outAt = 0;
83 
84         for (int at = 0; length > 0; /*at*/) {
85             int v0 = bytes.getUnsignedByte(at);
86             char out;
87             switch (v0 >> 4) {
88                 case 0x00: case 0x01: case 0x02: case 0x03:
89                 case 0x04: case 0x05: case 0x06: case 0x07: {
90                     // 0XXXXXXX -- single-byte encoding
91                     length--;
92                     if (v0 == 0) {
93                         // A single zero byte is illegal.
94                         return throwBadUtf8(v0, at);
95                     }
96                     out = (char) v0;
97                     at++;
98                     break;
99                 }
100                 case 0x0c: case 0x0d: {
101                     // 110XXXXX -- two-byte encoding
102                     length -= 2;
103                     if (length < 0) {
104                         return throwBadUtf8(v0, at);
105                     }
106                     int v1 = bytes.getUnsignedByte(at + 1);
107                     if ((v1 & 0xc0) != 0x80) {
108                         return throwBadUtf8(v1, at + 1);
109                     }
110                     int value = ((v0 & 0x1f) << 6) | (v1 & 0x3f);
111                     if ((value != 0) && (value < 0x80)) {
112                         /*
113                          * This should have been represented with
114                          * one-byte encoding.
115                          */
116                         return throwBadUtf8(v1, at + 1);
117                     }
118                     out = (char) value;
119                     at += 2;
120                     break;
121                 }
122                 case 0x0e: {
123                     // 1110XXXX -- three-byte encoding
124                     length -= 3;
125                     if (length < 0) {
126                         return throwBadUtf8(v0, at);
127                     }
128                     int v1 = bytes.getUnsignedByte(at + 1);
129                     if ((v1 & 0xc0) != 0x80) {
130                         return throwBadUtf8(v1, at + 1);
131                     }
132                     int v2 = bytes.getUnsignedByte(at + 2);
133                     if ((v1 & 0xc0) != 0x80) {
134                         return throwBadUtf8(v2, at + 2);
135                     }
136                     int value = ((v0 & 0x0f) << 12) | ((v1 & 0x3f) << 6) |
137                         (v2 & 0x3f);
138                     if (value < 0x800) {
139                         /*
140                          * This should have been represented with one- or
141                          * two-byte encoding.
142                          */
143                         return throwBadUtf8(v2, at + 2);
144                     }
145                     out = (char) value;
146                     at += 3;
147                     break;
148                 }
149                 default: {
150                     // 10XXXXXX, 1111XXXX -- illegal
151                     return throwBadUtf8(v0, at);
152                 }
153             }
154             chars[outAt] = out;
155             outAt++;
156         }
157 
158         return new String(chars, 0, outAt);
159     }
160 
161     /**
162      * Helper for {@link #utf8BytesToString}, which throws the right
163      * exception for a bogus utf-8 byte.
164      *
165      * @param value the byte value
166      * @param offset the file offset
167      * @return never
168      * @throws IllegalArgumentException always thrown
169      */
throwBadUtf8(int value, int offset)170     private static String throwBadUtf8(int value, int offset) {
171         throw new IllegalArgumentException("bad utf-8 byte " + Hex.u1(value) +
172                                            " at offset " + Hex.u4(offset));
173     }
174 
175     /**
176      * Constructs an instance from a {@code String}.
177      *
178      * @param string {@code non-null;} the UTF-8 value as a string
179      */
CstString(String string)180     public CstString(String string) {
181         if (string == null) {
182             throw new NullPointerException("string == null");
183         }
184 
185         this.string = string.intern();
186         this.bytes = new ByteArray(stringToUtf8Bytes(string));
187     }
188 
189     /**
190      * Constructs an instance from some UTF-8 bytes.
191      *
192      * @param bytes {@code non-null;} array of the UTF-8 bytes
193      */
CstString(ByteArray bytes)194     public CstString(ByteArray bytes) {
195         if (bytes == null) {
196             throw new NullPointerException("bytes == null");
197         }
198 
199         this.bytes = bytes;
200         this.string = utf8BytesToString(bytes).intern();
201     }
202 
203     /** {@inheritDoc} */
204     @Override
equals(Object other)205     public boolean equals(Object other) {
206         if (!(other instanceof CstString)) {
207             return false;
208         }
209 
210         return string.equals(((CstString) other).string);
211     }
212 
213     /** {@inheritDoc} */
214     @Override
hashCode()215     public int hashCode() {
216         return string.hashCode();
217     }
218 
219     /** {@inheritDoc} */
220     @Override
compareTo0(Constant other)221     protected int compareTo0(Constant other) {
222         return string.compareTo(((CstString) other).string);
223     }
224 
225     /** {@inheritDoc} */
226     @Override
toString()227     public String toString() {
228         return "string{\"" + toHuman() + "\"}";
229     }
230 
231     /** {@inheritDoc} */
232     @Override
typeName()233     public String typeName() {
234         return "utf8";
235     }
236 
237     /** {@inheritDoc} */
238     @Override
isCategory2()239     public boolean isCategory2() {
240         return false;
241     }
242 
243     /** {@inheritDoc} */
toHuman()244     public String toHuman() {
245         int len = string.length();
246         StringBuilder sb = new StringBuilder(len * 3 / 2);
247 
248         for (int i = 0; i < len; i++) {
249             char c = string.charAt(i);
250             if ((c >= ' ') && (c < 0x7f)) {
251                 if ((c == '\'') || (c == '\"') || (c == '\\')) {
252                     sb.append('\\');
253                 }
254                 sb.append(c);
255             } else if (c <= 0x7f) {
256                 switch (c) {
257                     case '\n': sb.append("\\n"); break;
258                     case '\r': sb.append("\\r"); break;
259                     case '\t': sb.append("\\t"); break;
260                     default: {
261                         /*
262                          * Represent the character as an octal escape.
263                          * If the next character is a valid octal
264                          * digit, disambiguate by using the
265                          * three-digit form.
266                          */
267                         char nextChar =
268                             (i < (len - 1)) ? string.charAt(i + 1) : 0;
269                         boolean displayZero =
270                             (nextChar >= '0') && (nextChar <= '7');
271                         sb.append('\\');
272                         for (int shift = 6; shift >= 0; shift -= 3) {
273                             char outChar = (char) (((c >> shift) & 7) + '0');
274                             if ((outChar != '0') || displayZero) {
275                                 sb.append(outChar);
276                                 displayZero = true;
277                             }
278                         }
279                         if (! displayZero) {
280                             // Ironic edge case: The original value was 0.
281                             sb.append('0');
282                         }
283                         break;
284                     }
285                 }
286             } else {
287                 sb.append("\\u");
288                 sb.append(Character.forDigit(c >> 12, 16));
289                 sb.append(Character.forDigit((c >> 8) & 0x0f, 16));
290                 sb.append(Character.forDigit((c >> 4) & 0x0f, 16));
291                 sb.append(Character.forDigit(c & 0x0f, 16));
292             }
293         }
294 
295         return sb.toString();
296     }
297 
298     /**
299      * Gets the value as a human-oriented string, surrounded by double
300      * quotes.
301      *
302      * @return {@code non-null;} the quoted string
303      */
toQuoted()304     public String toQuoted() {
305         return '\"' + toHuman() + '\"';
306     }
307 
308     /**
309      * Gets the value as a human-oriented string, surrounded by double
310      * quotes, but ellipsizes the result if it is longer than the given
311      * maximum length
312      *
313      * @param maxLength {@code >= 5;} the maximum length of the string to return
314      * @return {@code non-null;} the quoted string
315      */
toQuoted(int maxLength)316     public String toQuoted(int maxLength) {
317         String string = toHuman();
318         int length = string.length();
319         String ellipses;
320 
321         if (length <= (maxLength - 2)) {
322             ellipses = "";
323         } else {
324             string = string.substring(0, maxLength - 5);
325             ellipses = "...";
326         }
327 
328         return '\"' + string + ellipses + '\"';
329     }
330 
331     /**
332      * Gets the UTF-8 value as a string.
333      * The returned string is always already interned.
334      *
335      * @return {@code non-null;} the UTF-8 value as a string
336      */
getString()337     public String getString() {
338         return string;
339     }
340 
341     /**
342      * Gets the UTF-8 value as UTF-8 encoded bytes.
343      *
344      * @return {@code non-null;} an array of the UTF-8 bytes
345      */
getBytes()346     public ByteArray getBytes() {
347         return bytes;
348     }
349 
350     /**
351      * Gets the size of this instance as UTF-8 code points. That is,
352      * get the number of bytes in the UTF-8 encoding of this instance.
353      *
354      * @return {@code >= 0;} the UTF-8 size
355      */
getUtf8Size()356     public int getUtf8Size() {
357         return bytes.size();
358     }
359 
360     /**
361      * Gets the size of this instance as UTF-16 code points. That is,
362      * get the number of 16-bit chars in the UTF-16 encoding of this
363      * instance. This is the same as the {@code length} of the
364      * Java {@code String} representation of this instance.
365      *
366      * @return {@code >= 0;} the UTF-16 size
367      */
getUtf16Size()368     public int getUtf16Size() {
369         return string.length();
370     }
371 
getType()372     public Type getType() {
373         return Type.STRING;
374     }
375 }
376