• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  **********************************************************************
3  * Copyright (c) 2006-2007, Google and others.  All Rights Reserved.
4  **********************************************************************
5  * Author: Mark Davis
6  **********************************************************************
7  */
8 package org.unicode.cldr.util;
9 
10 import java.io.IOException;
11 
12 import com.ibm.icu.lang.UCharacter;
13 import com.ibm.icu.text.UTF16;
14 import com.ibm.icu.util.ICUUncheckedIOException;
15 
16 /**
17  * @author markdavis
18  */
19 // TODO optimize this
20 public class Utf8StringByteConverter extends StringByteConverter {
21     char lead = 0;
22 
23     @Override
toBytes(char ch, byte[] output, int bytePosition)24     public int toBytes(char ch, byte[] output, int bytePosition) {
25         // we may have state, if we were processing a supplemental char
26         if (lead != 0) {
27             if (UTF16.isTrailSurrogate(ch)) {
28                 int cp = UCharacter.getCodePoint(lead, ch);
29                 output[bytePosition++] = (byte) (0xF0 | (cp >>> 18));
30                 output[bytePosition++] = (byte) (0x80 | ((cp >>> 12) & 0x3F));
31                 output[bytePosition++] = (byte) (0x80 | ((cp >>> 6) & 0x3F));
32                 output[bytePosition++] = (byte) (0x80 | (cp & 0x3F));
33                 lead = 0;
34                 return bytePosition;
35             }
36             // write lead
37             output[bytePosition++] = (byte) (0xE0 | (lead >>> 12));
38             output[bytePosition++] = (byte) (0x80 | ((lead >>> 6) & 0x3F));
39             output[bytePosition++] = (byte) (0x80 | (lead & 0x3F));
40             lead = 0;
41         }
42         if (ch < 0x80) {
43             output[bytePosition++] = (byte) ch;
44         } else if (ch < 0x800) {
45             output[bytePosition++] = (byte) (0xC0 | (ch >>> 6));
46             output[bytePosition++] = (byte) (0x80 | (ch & 0x3F));
47         } else if (ch >= 0xD800 && ch < 0xDC00) {
48             lead = ch;
49         } else {
50             output[bytePosition++] = (byte) (0xE0 | (ch >>> 12));
51             output[bytePosition++] = (byte) (0x80 | ((ch >>> 6) & 0x3F));
52             output[bytePosition++] = (byte) (0x80 | (ch & 0x3F));
53         }
54         return bytePosition;
55     }
56 
57     @Override
toBytes(byte[] output, int bytePosition)58     public int toBytes(byte[] output, int bytePosition) {
59         if (lead != 0) {
60             output[bytePosition++] = (byte) (0xE0 | (lead >>> 12));
61             output[bytePosition++] = (byte) (0x80 | ((lead >>> 6) & 0x3F));
62             output[bytePosition++] = (byte) (0x80 | (lead & 0x3F));
63             lead = 0;
64         }
65         return bytePosition;
66     }
67 
68     @Override
getMaxBytesPerChar()69     public int getMaxBytesPerChar() {
70         return 4;
71     }
72 
73     @Override
fromBytes(byte[] input, int byteStart, int byteLength, Appendable result)74     public Appendable fromBytes(byte[] input, int byteStart, int byteLength,
75         Appendable result) {
76         try {
77             while (byteStart < byteLength) {
78                 char b = (char) (input[byteStart++] & 0xFF);
79                 if (b < 0x80) {
80                     // fall through
81                 } else if (b < 0xE0) {
82                     b &= 0x1F;
83                     b <<= 6;
84                     b |= (char) (input[byteStart++] & 0x3F);
85                 } else if (b < 0xF0) {
86                     b &= 0xF;
87                     b <<= 6;
88                     b |= (char) (input[byteStart++] & 0x3F);
89                     b <<= 6;
90                     b |= (char) (input[byteStart++] & 0x3F);
91                 } else {
92                     // surrogate
93                     int cp = (b & 0x7) << 6;
94                     cp |= (char) (input[byteStart++] & 0x3F);
95                     cp <<= 6;
96                     cp |= (char) (input[byteStart++] & 0x3F);
97                     cp <<= 6;
98                     cp |= (char) (input[byteStart++] & 0x3F);
99                     result.append(UTF16.getLeadSurrogate(cp));
100                     b = UTF16.getTrailSurrogate(cp);
101                 }
102                 result.append(b);
103             }
104             return result;
105         } catch (IOException e) {
106             throw new ICUUncheckedIOException("Internal error", e);
107         }
108     }
109 }