1 /* 2 * Copyright (C) 2007 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /* 18 * As per the Apache license requirements, this file has been modified 19 * from its original state. 20 * 21 * Such modifications are Copyright (C) 2010 Ben Gruver, and are released 22 * under the original license 23 */ 24 25 package org.jf.dexlib.Util; 26 27 import java.io.IOException; 28 import java.io.Writer; 29 30 /** 31 * Constants of type <code>CONSTANT_Utf8_info</code>. 32 */ 33 public final class Utf8Utils { 34 35 36 /** 37 * Converts a string into its Java-style UTF-8 form. Java-style UTF-8 38 * differs from normal UTF-8 in the handling of character '\0' and 39 * surrogate pairs. 40 * 41 * @param string non-null; the string to convert 42 * @return non-null; the UTF-8 bytes for it 43 */ stringToUtf8Bytes(String string)44 public static byte[] stringToUtf8Bytes(String string) { 45 int len = string.length(); 46 byte[] bytes = new byte[len * 3]; // Avoid having to reallocate. 47 int outAt = 0; 48 49 for (int i = 0; i < len; i++) { 50 char c = string.charAt(i); 51 if ((c != 0) && (c < 0x80)) { 52 bytes[outAt] = (byte) c; 53 outAt++; 54 } else if (c < 0x800) { 55 bytes[outAt] = (byte) (((c >> 6) & 0x1f) | 0xc0); 56 bytes[outAt + 1] = (byte) ((c & 0x3f) | 0x80); 57 outAt += 2; 58 } else { 59 bytes[outAt] = (byte) (((c >> 12) & 0x0f) | 0xe0); 60 bytes[outAt + 1] = (byte) (((c >> 6) & 0x3f) | 0x80); 61 bytes[outAt + 2] = (byte) ((c & 0x3f) | 0x80); 62 outAt += 3; 63 } 64 } 65 66 byte[] result = new byte[outAt]; 67 System.arraycopy(bytes, 0, result, 0, outAt); 68 return result; 69 } 70 71 private static char[] tempBuffer = null; 72 73 /** 74 * Converts an array of UTF-8 bytes into a string. 75 * 76 * This method uses a global buffer to avoid having to allocate one every time, so it is *not* thread-safe 77 * 78 * @param bytes non-null; the bytes to convert 79 * @param start the start index of the utf8 string to convert 80 * @param length the length of the utf8 string to convert, not including any null-terminator that might be present 81 * @return non-null; the converted string 82 */ utf8BytesToString(byte[] bytes, int start, int length)83 public static String utf8BytesToString(byte[] bytes, int start, int length) { 84 if (tempBuffer == null || tempBuffer.length < length) { 85 tempBuffer = new char[length]; 86 } 87 char[] chars = tempBuffer; 88 int outAt = 0; 89 90 for (int at = start; length > 0; /*at*/) { 91 int v0 = bytes[at] & 0xFF; 92 char out; 93 switch (v0 >> 4) { 94 case 0x00: case 0x01: case 0x02: case 0x03: 95 case 0x04: case 0x05: case 0x06: case 0x07: { 96 // 0XXXXXXX -- single-byte encoding 97 length--; 98 if (v0 == 0) { 99 // A single zero byte is illegal. 100 return throwBadUtf8(v0, at); 101 } 102 out = (char) v0; 103 at++; 104 break; 105 } 106 case 0x0c: case 0x0d: { 107 // 110XXXXX -- two-byte encoding 108 length -= 2; 109 if (length < 0) { 110 return throwBadUtf8(v0, at); 111 } 112 int v1 = bytes[at + 1] & 0xFF; 113 if ((v1 & 0xc0) != 0x80) { 114 return throwBadUtf8(v1, at + 1); 115 } 116 int value = ((v0 & 0x1f) << 6) | (v1 & 0x3f); 117 if ((value != 0) && (value < 0x80)) { 118 /* 119 * This should have been represented with 120 * one-byte encoding. 121 */ 122 return throwBadUtf8(v1, at + 1); 123 } 124 out = (char) value; 125 at += 2; 126 break; 127 } 128 case 0x0e: { 129 // 1110XXXX -- three-byte encoding 130 length -= 3; 131 if (length < 0) { 132 return throwBadUtf8(v0, at); 133 } 134 int v1 = bytes[at + 1] & 0xFF; 135 if ((v1 & 0xc0) != 0x80) { 136 return throwBadUtf8(v1, at + 1); 137 } 138 int v2 = bytes[at + 2] & 0xFF; 139 if ((v2 & 0xc0) != 0x80) { 140 return throwBadUtf8(v2, at + 2); 141 } 142 int value = ((v0 & 0x0f) << 12) | ((v1 & 0x3f) << 6) | 143 (v2 & 0x3f); 144 if (value < 0x800) { 145 /* 146 * This should have been represented with one- or 147 * two-byte encoding. 148 */ 149 return throwBadUtf8(v2, at + 2); 150 } 151 out = (char) value; 152 at += 3; 153 break; 154 } 155 default: { 156 // 10XXXXXX, 1111XXXX -- illegal 157 return throwBadUtf8(v0, at); 158 } 159 } 160 chars[outAt] = out; 161 outAt++; 162 } 163 164 return new String(chars, 0, outAt); 165 } 166 167 /** 168 * Helper for {@link #utf8BytesToString}, which throws the right 169 * exception for a bogus utf-8 byte. 170 * 171 * @param value the byte value 172 * @param offset the file offset 173 * @return never 174 * @throws IllegalArgumentException always thrown 175 */ throwBadUtf8(int value, int offset)176 private static String throwBadUtf8(int value, int offset) { 177 throw new IllegalArgumentException("bad utf-8 byte " + Hex.u1(value) + 178 " at offset " + Hex.u4(offset)); 179 } 180 writeEscapedChar(Writer writer, char c)181 public static void writeEscapedChar(Writer writer, char c) throws IOException { 182 if ((c >= ' ') && (c < 0x7f)) { 183 if ((c == '\'') || (c == '\"') || (c == '\\')) { 184 writer.write('\\'); 185 } 186 writer.write(c); 187 return; 188 } else if (c <= 0x7f) { 189 switch (c) { 190 case '\n': writer.write("\\n"); return; 191 case '\r': writer.write("\\r"); return; 192 case '\t': writer.write("\\t"); return; 193 } 194 } 195 196 writer.write("\\u"); 197 writer.write(Character.forDigit(c >> 12, 16)); 198 writer.write(Character.forDigit((c >> 8) & 0x0f, 16)); 199 writer.write(Character.forDigit((c >> 4) & 0x0f, 16)); 200 writer.write(Character.forDigit(c & 0x0f, 16)); 201 202 } 203 writeEscapedString(Writer writer, String value)204 public static void writeEscapedString(Writer writer, String value) throws IOException { 205 for (int i = 0; i < value.length(); i++) { 206 char c = value.charAt(i); 207 208 if ((c >= ' ') && (c < 0x7f)) { 209 if ((c == '\'') || (c == '\"') || (c == '\\')) { 210 writer.write('\\'); 211 } 212 writer.write(c); 213 continue; 214 } else if (c <= 0x7f) { 215 switch (c) { 216 case '\n': writer.write("\\n"); continue; 217 case '\r': writer.write("\\r"); continue; 218 case '\t': writer.write("\\t"); continue; 219 } 220 } 221 222 writer.write("\\u"); 223 writer.write(Character.forDigit(c >> 12, 16)); 224 writer.write(Character.forDigit((c >> 8) & 0x0f, 16)); 225 writer.write(Character.forDigit((c >> 4) & 0x0f, 16)); 226 writer.write(Character.forDigit(c & 0x0f, 16)); 227 } 228 } 229 escapeString(String value)230 public static String escapeString(String value) { 231 int len = value.length(); 232 StringBuilder sb = new StringBuilder(len * 3 / 2); 233 234 for (int i = 0; i < len; i++) { 235 char c = value.charAt(i); 236 237 if ((c >= ' ') && (c < 0x7f)) { 238 if ((c == '\'') || (c == '\"') || (c == '\\')) { 239 sb.append('\\'); 240 } 241 sb.append(c); 242 continue; 243 } else if (c <= 0x7f) { 244 switch (c) { 245 case '\n': sb.append("\\n"); continue; 246 case '\r': sb.append("\\r"); continue; 247 case '\t': sb.append("\\t"); continue; 248 } 249 } 250 251 sb.append("\\u"); 252 sb.append(Character.forDigit(c >> 12, 16)); 253 sb.append(Character.forDigit((c >> 8) & 0x0f, 16)); 254 sb.append(Character.forDigit((c >> 4) & 0x0f, 16)); 255 sb.append(Character.forDigit(c & 0x0f, 16)); 256 } 257 258 return sb.toString(); 259 } 260 } 261