1 package org.unicode.cldr.icu; 2 3 /** 4 * This Class is the Java representation of the ICU4C structure UDataInfo which can be found in 5 * <I>$icu4c_root</I>/source/common/unicode/udata.h 6 * 7 * <p>This class is used by LDML2ICUBinaryWriter to store information that must be written in the 8 * ICU Binary format. 9 * 10 * <p>Note that if this data structure ever grows, the getSize() method must be updated. 11 * 12 * @author Brian Rower - June 2008 13 */ 14 public class UDataInfo { 15 16 /** 17 * Use to signify that this data is in Big Endian form. Currently the only mode supported in 18 * Java is Big Endian. 19 */ 20 public static final byte BIGENDIAN = 1; 21 22 /** 23 * charsetFamily is equal to this value when the platform is an ASCII based platform. Currently 24 * the only mode supported in Java is ASCII This mirrors the ICU4C version in 25 * <I>$icu4c_root</I>/source/common/unicode/utypes.h 26 */ 27 public static final byte ASCII_FAMILY = 0; 28 29 /** 30 * This is the value for setting sizeofUChar. Currently it is 16 bits (2 bytes). UChar is 31 * currently defined in <I>$icu4c_root</I>/source/common/unicode/umachine.h 32 */ 33 public static final byte SIZE_OF_UCHAR = 2; 34 35 /** 36 * This field stores the size of this data structure in memory. Add up the size of each part of 37 * it. 38 */ 39 public short size; 40 41 /** This field is currently unused, set it to zero. */ 42 public short reservedWord; 43 44 /** 45 * This field is used to signify the Endian mode of a system. Choose from the static final int's 46 * provided in this class. In Java, there is only one possibility: Big Endian. 47 */ 48 public byte isBigEndian; 49 50 /** This field stores the character set which is being used. */ 51 public byte charsetFamily; 52 53 /** Size of the UChar structure in C. */ 54 public byte sizeofUChar; 55 56 /** This field is currently unused, set it to zero. */ 57 public byte reservedByte; 58 59 /** This field stores an identifier for the data format. Array should be of length 4. */ 60 public byte[] dataFormat; 61 62 /** 63 * This field stores the Format version. Array should be of length 4.<br> 64 * [0] = major<br> 65 * [1] = minor<br> 66 * [2] = milli<br> 67 * [3] = micro<br> 68 */ 69 public byte[] formatVersion; 70 71 /** 72 * This field stores the data version. Array should be of length 4.<br> 73 * [0] = major<br> 74 * [1] = minor<br> 75 * [2] = milli<br> 76 * [3] = micro<br> 77 */ 78 public byte[] dataVersion; 79 80 class IncorrectArrayLengthException extends Exception { 81 /** */ 82 private static final long serialVersionUID = -3238261375903639881L; 83 IncorrectArrayLengthException(String message)84 IncorrectArrayLengthException(String message) { 85 super(message); 86 } 87 } 88 UDataInfo( short size, short reservedWord, byte isBigEndian, byte charsetFamily, byte sizeofUChar, byte reservedByte, byte[] dataFormat, byte[] formatVersion, byte[] dataVersion)89 public UDataInfo( 90 short size, 91 short reservedWord, 92 byte isBigEndian, 93 byte charsetFamily, 94 byte sizeofUChar, 95 byte reservedByte, 96 byte[] dataFormat, 97 byte[] formatVersion, 98 byte[] dataVersion) 99 throws IncorrectArrayLengthException { 100 if (dataFormat.length != 4) { 101 throw new IncorrectArrayLengthException( 102 "The byte array 'dataFormat' must be of length 4."); 103 } 104 if (formatVersion.length != 4) { 105 throw new IncorrectArrayLengthException( 106 "The byte array 'formatVersion' must be of length 4."); 107 } 108 if (dataVersion.length != 4) { 109 throw new IncorrectArrayLengthException( 110 "The byte array 'dataVersion' must be of length 4."); 111 } 112 this.size = size; 113 this.reservedWord = reservedWord; 114 this.isBigEndian = isBigEndian; 115 this.charsetFamily = charsetFamily; 116 this.sizeofUChar = sizeofUChar; 117 this.reservedByte = reservedByte; 118 this.dataFormat = dataFormat; 119 this.formatVersion = formatVersion; 120 this.dataVersion = dataVersion; 121 } 122 123 /** 124 * This method returns the size that this structure will occupy when written to binary file. 125 * byte = 1 byte <br> 126 * short = 2 bytes<br> 127 * int = 4 bytes<br> 128 * long = 8 bytes<br> 129 * float = 4 bytes<br> 130 * double = 8 bytes<br> 131 * char = 2 bytes<br> 132 * 133 * @return The number of bytes that UDataInfo occupies 134 */ getSize()135 public static short getSize() { 136 /* 137 * number of short elements = 2 138 * number of byte elements = 4 139 * number of byte array elements of length 4 = 3 140 * 2*2 + 4*1 + 3*4 = 4 + 4 + 12 = 20 bytes 141 */ 142 return 20; 143 } 144 145 /** 146 * Returns a byte array representing the UDataStructure so that it can be written byte by byte. 147 * 148 * @returns a byte array of the contents of this UDataStructure. 149 */ getByteArray()150 public byte[] getByteArray() { 151 // This size may change, see get size method above. 152 byte[] b = new byte[20]; 153 byte[] sizeBytes = shortToBytes(size); 154 155 // write the size 156 b[0] = sizeBytes[0]; 157 b[1] = sizeBytes[1]; 158 159 // write the reserved word (a bunch of zeros) 160 b[2] = 0; 161 b[3] = 0; 162 163 // write isBigEndian 164 b[4] = isBigEndian; 165 166 // write charsetFamily 167 b[5] = charsetFamily; 168 169 // write sizeofUChar 170 b[6] = sizeofUChar; 171 172 // write reserved byte (some zeros) 173 b[7] = 0; 174 175 // write the dataFormat 176 b[8] = dataFormat[0]; 177 b[9] = dataFormat[1]; 178 b[10] = dataFormat[2]; 179 b[11] = dataFormat[3]; 180 181 // write the formatVersion 182 b[12] = formatVersion[0]; 183 b[13] = formatVersion[1]; 184 b[14] = formatVersion[2]; 185 b[15] = formatVersion[3]; 186 187 // write the dataVersion 188 b[16] = dataVersion[0]; 189 b[17] = dataVersion[1]; 190 b[18] = dataVersion[2]; 191 b[19] = dataVersion[3]; 192 193 return b; 194 } 195 196 /** 197 * Takes a 16 bit number and returns a two byte array. 0th element is lower byte, 1st element is 198 * upper byte. Ex: x = 28,000. In binary: 0110 1101 0110 0000. This method will return: [0] = 199 * 0110 0000 or 0x60 [1] = 0110 1101 or 0x6D 200 */ shortToBytes(short x)201 private static byte[] shortToBytes(short x) { 202 byte[] b = new byte[2]; 203 byte mask = (byte) 0xFF; 204 b[1] = (byte) (x & mask); // bitwise and with the lower byte 205 b[0] = (byte) ((x >>> 8) & mask); // shift four bits to the right and fill with zeros, and 206 // then bitwise and with 207 // the lower byte 208 return b; 209 } 210 } 211