1 /* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.dex; 18 19 import com.android.dex.util.ByteInput; 20 import java.io.UTFDataFormatException; 21 22 /** 23 * Modified UTF-8 as described in the dex file format spec. 24 * 25 * <p>Derived from libcore's MUTF-8 encoder at java.nio.charset.ModifiedUtf8. 26 */ 27 public final class Mutf8 { Mutf8()28 private Mutf8() {} 29 30 /** 31 * Decodes bytes from {@code in} into {@code out} until a delimiter 0x00 is 32 * encountered. Returns a new string containing the decoded characters. 33 */ decode(ByteInput in, char[] out)34 public static String decode(ByteInput in, char[] out) throws UTFDataFormatException { 35 int s = 0; 36 while (true) { 37 char a = (char) (in.readByte() & 0xff); 38 if (a == 0) { 39 return new String(out, 0, s); 40 } 41 out[s] = a; 42 if (a < '\u0080') { 43 s++; 44 } else if ((a & 0xe0) == 0xc0) { 45 int b = in.readByte() & 0xff; 46 if ((b & 0xC0) != 0x80) { 47 throw new UTFDataFormatException("bad second byte"); 48 } 49 out[s++] = (char) (((a & 0x1F) << 6) | (b & 0x3F)); 50 } else if ((a & 0xf0) == 0xe0) { 51 int b = in.readByte() & 0xff; 52 int c = in.readByte() & 0xff; 53 if (((b & 0xC0) != 0x80) || ((c & 0xC0) != 0x80)) { 54 throw new UTFDataFormatException("bad second or third byte"); 55 } 56 out[s++] = (char) (((a & 0x0F) << 12) | ((b & 0x3F) << 6) | (c & 0x3F)); 57 } else { 58 throw new UTFDataFormatException("bad byte"); 59 } 60 } 61 } 62 63 /** 64 * Returns the number of bytes the modified UTF8 representation of 's' would take. 65 */ countBytes(String s, boolean shortLength)66 private static long countBytes(String s, boolean shortLength) throws UTFDataFormatException { 67 long result = 0; 68 final int length = s.length(); 69 for (int i = 0; i < length; ++i) { 70 char ch = s.charAt(i); 71 if (ch != 0 && ch <= 127) { // U+0000 uses two bytes. 72 ++result; 73 } else if (ch <= 2047) { 74 result += 2; 75 } else { 76 result += 3; 77 } 78 if (shortLength && result > 65535) { 79 throw new UTFDataFormatException("String more than 65535 UTF bytes long"); 80 } 81 } 82 return result; 83 } 84 85 /** 86 * Encodes the modified UTF-8 bytes corresponding to {@code s} into {@code 87 * dst}, starting at {@code offset}. 88 */ encode(byte[] dst, int offset, String s)89 public static void encode(byte[] dst, int offset, String s) { 90 final int length = s.length(); 91 for (int i = 0; i < length; i++) { 92 char ch = s.charAt(i); 93 if (ch != 0 && ch <= 127) { // U+0000 uses two bytes. 94 dst[offset++] = (byte) ch; 95 } else if (ch <= 2047) { 96 dst[offset++] = (byte) (0xc0 | (0x1f & (ch >> 6))); 97 dst[offset++] = (byte) (0x80 | (0x3f & ch)); 98 } else { 99 dst[offset++] = (byte) (0xe0 | (0x0f & (ch >> 12))); 100 dst[offset++] = (byte) (0x80 | (0x3f & (ch >> 6))); 101 dst[offset++] = (byte) (0x80 | (0x3f & ch)); 102 } 103 } 104 } 105 106 /** 107 * Returns an array containing the <i>modified UTF-8</i> form of {@code s}. 108 */ encode(String s)109 public static byte[] encode(String s) throws UTFDataFormatException { 110 int utfCount = (int) countBytes(s, true); 111 byte[] result = new byte[utfCount]; 112 encode(result, 0, s); 113 return result; 114 } 115 } 116