1 /* 2 ********************************************************************** 3 * Copyright (c) 2006-2007, Google and others. All Rights Reserved. 4 ********************************************************************** 5 * Author: Mark Davis 6 ********************************************************************** 7 */ 8 package org.unicode.cldr.util; 9 10 import java.io.IOException; 11 12 import com.ibm.icu.lang.UCharacter; 13 import com.ibm.icu.text.UTF16; 14 import com.ibm.icu.util.ICUUncheckedIOException; 15 16 /** 17 * @author markdavis 18 */ 19 // TODO optimize this 20 public class Utf8StringByteConverter extends StringByteConverter { 21 char lead = 0; 22 23 @Override toBytes(char ch, byte[] output, int bytePosition)24 public int toBytes(char ch, byte[] output, int bytePosition) { 25 // we may have state, if we were processing a supplemental char 26 if (lead != 0) { 27 if (UTF16.isTrailSurrogate(ch)) { 28 int cp = UCharacter.getCodePoint(lead, ch); 29 output[bytePosition++] = (byte) (0xF0 | (cp >>> 18)); 30 output[bytePosition++] = (byte) (0x80 | ((cp >>> 12) & 0x3F)); 31 output[bytePosition++] = (byte) (0x80 | ((cp >>> 6) & 0x3F)); 32 output[bytePosition++] = (byte) (0x80 | (cp & 0x3F)); 33 lead = 0; 34 return bytePosition; 35 } 36 // write lead 37 output[bytePosition++] = (byte) (0xE0 | (lead >>> 12)); 38 output[bytePosition++] = (byte) (0x80 | ((lead >>> 6) & 0x3F)); 39 output[bytePosition++] = (byte) (0x80 | (lead & 0x3F)); 40 lead = 0; 41 } 42 if (ch < 0x80) { 43 output[bytePosition++] = (byte) ch; 44 } else if (ch < 0x800) { 45 output[bytePosition++] = (byte) (0xC0 | (ch >>> 6)); 46 output[bytePosition++] = (byte) (0x80 | (ch & 0x3F)); 47 } else if (ch >= 0xD800 && ch < 0xDC00) { 48 lead = ch; 49 } else { 50 output[bytePosition++] = (byte) (0xE0 | (ch >>> 12)); 51 output[bytePosition++] = (byte) (0x80 | ((ch >>> 6) & 0x3F)); 52 output[bytePosition++] = (byte) (0x80 | (ch & 0x3F)); 53 } 54 return bytePosition; 55 } 56 57 @Override toBytes(byte[] output, int bytePosition)58 public int toBytes(byte[] output, int bytePosition) { 59 if (lead != 0) { 60 output[bytePosition++] = (byte) (0xE0 | (lead >>> 12)); 61 output[bytePosition++] = (byte) (0x80 | ((lead >>> 6) & 0x3F)); 62 output[bytePosition++] = (byte) (0x80 | (lead & 0x3F)); 63 lead = 0; 64 } 65 return bytePosition; 66 } 67 68 @Override getMaxBytesPerChar()69 public int getMaxBytesPerChar() { 70 return 4; 71 } 72 73 @Override fromBytes(byte[] input, int byteStart, int byteLength, Appendable result)74 public Appendable fromBytes(byte[] input, int byteStart, int byteLength, 75 Appendable result) { 76 try { 77 while (byteStart < byteLength) { 78 char b = (char) (input[byteStart++] & 0xFF); 79 if (b < 0x80) { 80 // fall through 81 } else if (b < 0xE0) { 82 b &= 0x1F; 83 b <<= 6; 84 b |= (char) (input[byteStart++] & 0x3F); 85 } else if (b < 0xF0) { 86 b &= 0xF; 87 b <<= 6; 88 b |= (char) (input[byteStart++] & 0x3F); 89 b <<= 6; 90 b |= (char) (input[byteStart++] & 0x3F); 91 } else { 92 // surrogate 93 int cp = (b & 0x7) << 6; 94 cp |= (char) (input[byteStart++] & 0x3F); 95 cp <<= 6; 96 cp |= (char) (input[byteStart++] & 0x3F); 97 cp <<= 6; 98 cp |= (char) (input[byteStart++] & 0x3F); 99 result.append(UTF16.getLeadSurrogate(cp)); 100 b = UTF16.getTrailSurrogate(cp); 101 } 102 result.append(b); 103 } 104 return result; 105 } catch (IOException e) { 106 throw new ICUUncheckedIOException("Internal error", e); 107 } 108 } 109 }