• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2007 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*
18  * As per the Apache license requirements, this file has been modified
19  * from its original state.
20  *
21  * Such modifications are Copyright (C) 2010 Ben Gruver, and are released
22  * under the original license
23  */
24 
25 package org.jf.dexlib.Util;
26 
27 import java.io.IOException;
28 import java.io.Writer;
29 
30 /**
31  * Constants of type <code>CONSTANT_Utf8_info</code>.
32  */
33 public final class Utf8Utils {
34 
35 
36     /**
37      * Converts a string into its Java-style UTF-8 form. Java-style UTF-8
38      * differs from normal UTF-8 in the handling of character '\0' and
39      * surrogate pairs.
40      *
41      * @param string non-null; the string to convert
42      * @return non-null; the UTF-8 bytes for it
43      */
stringToUtf8Bytes(String string)44     public static byte[] stringToUtf8Bytes(String string) {
45         int len = string.length();
46         byte[] bytes = new byte[len * 3]; // Avoid having to reallocate.
47         int outAt = 0;
48 
49         for (int i = 0; i < len; i++) {
50             char c = string.charAt(i);
51             if ((c != 0) && (c < 0x80)) {
52                 bytes[outAt] = (byte) c;
53                 outAt++;
54             } else if (c < 0x800) {
55                 bytes[outAt] = (byte) (((c >> 6) & 0x1f) | 0xc0);
56                 bytes[outAt + 1] = (byte) ((c & 0x3f) | 0x80);
57                 outAt += 2;
58             } else {
59                 bytes[outAt] = (byte) (((c >> 12) & 0x0f) | 0xe0);
60                 bytes[outAt + 1] = (byte) (((c >> 6) & 0x3f) | 0x80);
61                 bytes[outAt + 2] = (byte) ((c & 0x3f) | 0x80);
62                 outAt += 3;
63             }
64         }
65 
66         byte[] result = new byte[outAt];
67         System.arraycopy(bytes, 0, result, 0, outAt);
68         return result;
69     }
70 
71     private static char[] tempBuffer = null;
72 
73     /**
74      * Converts an array of UTF-8 bytes into a string.
75      *
76      * This method uses a global buffer to avoid having to allocate one every time, so it is *not* thread-safe
77      *
78      * @param bytes non-null; the bytes to convert
79      * @param start the start index of the utf8 string to convert
80      * @param length the length of the utf8 string to convert, not including any null-terminator that might be present
81      * @return non-null; the converted string
82      */
utf8BytesToString(byte[] bytes, int start, int length)83     public static String utf8BytesToString(byte[] bytes, int start, int length) {
84         if (tempBuffer == null || tempBuffer.length < length) {
85             tempBuffer = new char[length];
86         }
87         char[] chars = tempBuffer;
88         int outAt = 0;
89 
90         for (int at = start; length > 0; /*at*/) {
91             int v0 = bytes[at] & 0xFF;
92             char out;
93             switch (v0 >> 4) {
94                 case 0x00: case 0x01: case 0x02: case 0x03:
95                 case 0x04: case 0x05: case 0x06: case 0x07: {
96                     // 0XXXXXXX -- single-byte encoding
97                     length--;
98                     if (v0 == 0) {
99                         // A single zero byte is illegal.
100                         return throwBadUtf8(v0, at);
101                     }
102                     out = (char) v0;
103                     at++;
104                     break;
105                 }
106                 case 0x0c: case 0x0d: {
107                     // 110XXXXX -- two-byte encoding
108                     length -= 2;
109                     if (length < 0) {
110                         return throwBadUtf8(v0, at);
111                     }
112                     int v1 = bytes[at + 1] & 0xFF;
113                     if ((v1 & 0xc0) != 0x80) {
114                         return throwBadUtf8(v1, at + 1);
115                     }
116                     int value = ((v0 & 0x1f) << 6) | (v1 & 0x3f);
117                     if ((value != 0) && (value < 0x80)) {
118                         /*
119                          * This should have been represented with
120                          * one-byte encoding.
121                          */
122                         return throwBadUtf8(v1, at + 1);
123                     }
124                     out = (char) value;
125                     at += 2;
126                     break;
127                 }
128                 case 0x0e: {
129                     // 1110XXXX -- three-byte encoding
130                     length -= 3;
131                     if (length < 0) {
132                         return throwBadUtf8(v0, at);
133                     }
134                     int v1 = bytes[at + 1] & 0xFF;
135                     if ((v1 & 0xc0) != 0x80) {
136                         return throwBadUtf8(v1, at + 1);
137                     }
138                     int v2 = bytes[at + 2] & 0xFF;
139                     if ((v2 & 0xc0) != 0x80) {
140                         return throwBadUtf8(v2, at + 2);
141                     }
142                     int value = ((v0 & 0x0f) << 12) | ((v1 & 0x3f) << 6) |
143                         (v2 & 0x3f);
144                     if (value < 0x800) {
145                         /*
146                          * This should have been represented with one- or
147                          * two-byte encoding.
148                          */
149                         return throwBadUtf8(v2, at + 2);
150                     }
151                     out = (char) value;
152                     at += 3;
153                     break;
154                 }
155                 default: {
156                     // 10XXXXXX, 1111XXXX -- illegal
157                     return throwBadUtf8(v0, at);
158                 }
159             }
160             chars[outAt] = out;
161             outAt++;
162         }
163 
164         return new String(chars, 0, outAt);
165     }
166 
167     /**
168      * Helper for {@link #utf8BytesToString}, which throws the right
169      * exception for a bogus utf-8 byte.
170      *
171      * @param value the byte value
172      * @param offset the file offset
173      * @return never
174      * @throws IllegalArgumentException always thrown
175      */
throwBadUtf8(int value, int offset)176     private static String throwBadUtf8(int value, int offset) {
177         throw new IllegalArgumentException("bad utf-8 byte " + Hex.u1(value) +
178                                            " at offset " + Hex.u4(offset));
179     }
180 
writeEscapedChar(Writer writer, char c)181     public static void writeEscapedChar(Writer writer, char c) throws IOException {
182         if ((c >= ' ') && (c < 0x7f)) {
183             if ((c == '\'') || (c == '\"') || (c == '\\')) {
184                 writer.write('\\');
185             }
186             writer.write(c);
187             return;
188         } else if (c <= 0x7f) {
189             switch (c) {
190                 case '\n': writer.write("\\n"); return;
191                 case '\r': writer.write("\\r"); return;
192                 case '\t': writer.write("\\t"); return;
193             }
194         }
195 
196         writer.write("\\u");
197         writer.write(Character.forDigit(c >> 12, 16));
198         writer.write(Character.forDigit((c >> 8) & 0x0f, 16));
199         writer.write(Character.forDigit((c >> 4) & 0x0f, 16));
200         writer.write(Character.forDigit(c & 0x0f, 16));
201 
202     }
203 
writeEscapedString(Writer writer, String value)204     public static void writeEscapedString(Writer writer, String value) throws IOException {
205         for (int i = 0; i < value.length(); i++) {
206             char c = value.charAt(i);
207 
208             if ((c >= ' ') && (c < 0x7f)) {
209                 if ((c == '\'') || (c == '\"') || (c == '\\')) {
210                     writer.write('\\');
211                 }
212                 writer.write(c);
213                 continue;
214             } else if (c <= 0x7f) {
215                 switch (c) {
216                     case '\n': writer.write("\\n"); continue;
217                     case '\r': writer.write("\\r"); continue;
218                     case '\t': writer.write("\\t"); continue;
219                 }
220             }
221 
222             writer.write("\\u");
223             writer.write(Character.forDigit(c >> 12, 16));
224             writer.write(Character.forDigit((c >> 8) & 0x0f, 16));
225             writer.write(Character.forDigit((c >> 4) & 0x0f, 16));
226             writer.write(Character.forDigit(c & 0x0f, 16));
227         }
228     }
229 
escapeString(String value)230     public static String escapeString(String value) {
231         int len = value.length();
232         StringBuilder sb = new StringBuilder(len * 3 / 2);
233 
234         for (int i = 0; i < len; i++) {
235             char c = value.charAt(i);
236 
237             if ((c >= ' ') && (c < 0x7f)) {
238                 if ((c == '\'') || (c == '\"') || (c == '\\')) {
239                     sb.append('\\');
240                 }
241                 sb.append(c);
242                 continue;
243             } else if (c <= 0x7f) {
244                 switch (c) {
245                     case '\n': sb.append("\\n"); continue;
246                     case '\r': sb.append("\\r"); continue;
247                     case '\t': sb.append("\\t"); continue;
248                 }
249             }
250 
251             sb.append("\\u");
252             sb.append(Character.forDigit(c >> 12, 16));
253             sb.append(Character.forDigit((c >> 8) & 0x0f, 16));
254             sb.append(Character.forDigit((c >> 4) & 0x0f, 16));
255             sb.append(Character.forDigit(c & 0x0f, 16));
256         }
257 
258         return sb.toString();
259     }
260 }
261