1 package org.unicode.cldr.icu; 2 3 /** 4 * This Class is the Java representation of the ICU4C structure UDataInfo which can 5 * be found in <I>$icu4c_root</I>/source/common/unicode/udata.h 6 * 7 * <p> 8 * This class is used by LDML2ICUBinaryWriter to store information that must be written in the ICU Binary format. 9 * 10 * Note that if this data structure ever grows, the getSize() method must be updated. 11 * 12 * @author Brian Rower - June 2008 13 * 14 */ 15 public class UDataInfo { 16 17 /** 18 * Use to signify that this data is in Big Endian form. 19 * Currently the only mode supported in Java is Big Endian. 20 */ 21 public static final byte BIGENDIAN = 1; 22 23 /** 24 * charsetFamily is equal to this value when the platform is an ASCII based platform. 25 * Currently the only mode supported in Java is ASCII 26 * This mirrors the ICU4C version in <I>$icu4c_root</I>/source/common/unicode/utypes.h 27 */ 28 public static final byte ASCII_FAMILY = 0; 29 30 /** 31 * This is the value for setting sizeofUChar. Currently it is 16 bits (2 bytes). 32 * UChar is currently defined in <I>$icu4c_root</I>/source/common/unicode/umachine.h 33 */ 34 public static final byte SIZE_OF_UCHAR = 2; 35 36 /** 37 * This field stores the size of this data structure in memory. 38 * Add up the size of each part of it. 39 */ 40 public short size; 41 42 /** 43 * This field is currently unused, set it to zero. 44 */ 45 public short reservedWord; 46 47 /** 48 * This field is used to signify the Endian mode of a system. 49 * Choose from the static final int's provided in this class. 50 * In Java, there is only one possibility: Big Endian. 51 */ 52 public byte isBigEndian; 53 54 /** 55 * This field stores the character set which is being used. 56 */ 57 public byte charsetFamily; 58 59 /** 60 * Size of the UChar structure in C. 61 */ 62 public byte sizeofUChar; 63 64 /** 65 * This field is currently unused, set it to zero. 66 */ 67 public byte reservedByte; 68 69 /** 70 * This field stores an identifier for the data format. 71 * Array should be of length 4. 72 */ 73 public byte[] dataFormat; 74 75 /** 76 * This field stores the Format version. Array should be of length 4.<br> 77 * [0] = major<br> 78 * [1] = minor<br> 79 * [2] = milli<br> 80 * [3] = micro<br> 81 */ 82 public byte[] formatVersion; 83 84 /** 85 * This field stores the data version. Array should be of length 4.<br> 86 * [0] = major<br> 87 * [1] = minor<br> 88 * [2] = milli<br> 89 * [3] = micro<br> 90 */ 91 public byte[] dataVersion; 92 93 class IncorrectArrayLengthException extends Exception { 94 /** 95 * 96 */ 97 private static final long serialVersionUID = -3238261375903639881L; 98 IncorrectArrayLengthException(String message)99 IncorrectArrayLengthException(String message) { 100 super(message); 101 } 102 } 103 UDataInfo(short size, short reservedWord, byte isBigEndian, byte charsetFamily, byte sizeofUChar, byte reservedByte, byte[] dataFormat, byte[] formatVersion, byte[] dataVersion)104 public UDataInfo(short size, short reservedWord, byte isBigEndian, byte charsetFamily, byte sizeofUChar, 105 byte reservedByte, byte[] dataFormat, byte[] formatVersion, byte[] dataVersion) 106 throws IncorrectArrayLengthException { 107 if (dataFormat.length != 4) { 108 throw new IncorrectArrayLengthException("The byte array 'dataFormat' must be of length 4."); 109 } 110 if (formatVersion.length != 4) { 111 throw new IncorrectArrayLengthException("The byte array 'formatVersion' must be of length 4."); 112 } 113 if (dataVersion.length != 4) { 114 throw new IncorrectArrayLengthException("The byte array 'dataVersion' must be of length 4."); 115 } 116 this.size = size; 117 this.reservedWord = reservedWord; 118 this.isBigEndian = isBigEndian; 119 this.charsetFamily = charsetFamily; 120 this.sizeofUChar = sizeofUChar; 121 this.reservedByte = reservedByte; 122 this.dataFormat = dataFormat; 123 this.formatVersion = formatVersion; 124 this.dataVersion = dataVersion; 125 } 126 127 /** 128 * This method returns the size that this structure will occupy when written to binary file. 129 * byte = 1 byte <Br> 130 * short = 2 bytes<Br> 131 * int = 4 bytes<Br> 132 * long = 8 bytes<Br> 133 * float = 4 bytes<Br> 134 * double = 8 bytes<br> 135 * char = 2 bytes<br> 136 * 137 * @return The number of bytes that UDataInfo occupies 138 */ getSize()139 public static short getSize() { 140 /* 141 * number of short elements = 2 142 * number of byte elements = 4 143 * number of byte array elements of length 4 = 3 144 * 2*2 + 4*1 + 3*4 = 4 + 4 + 12 = 20 bytes 145 */ 146 return 20; 147 } 148 149 /** 150 * Returns a byte array representing the UDataStructure so that it can be written byte by byte. 151 * 152 * @returns a byte array of the contents of this UDataStructure. 153 */ getByteArray()154 public byte[] getByteArray() { 155 // This size may change, see get size method above. 156 byte[] b = new byte[20]; 157 byte[] sizeBytes = shortToBytes(size); 158 159 // write the size 160 b[0] = sizeBytes[0]; 161 b[1] = sizeBytes[1]; 162 163 // write the reserved word (a bunch of zeros) 164 b[2] = 0; 165 b[3] = 0; 166 167 // write isBigEndian 168 b[4] = isBigEndian; 169 170 // write charsetFamily 171 b[5] = charsetFamily; 172 173 // write sizeofUChar 174 b[6] = sizeofUChar; 175 176 // write reserved byte (some zeros) 177 b[7] = 0; 178 179 // write the dataFormat 180 b[8] = dataFormat[0]; 181 b[9] = dataFormat[1]; 182 b[10] = dataFormat[2]; 183 b[11] = dataFormat[3]; 184 185 // write the formatVersion 186 b[12] = formatVersion[0]; 187 b[13] = formatVersion[1]; 188 b[14] = formatVersion[2]; 189 b[15] = formatVersion[3]; 190 191 // write the dataVersion 192 b[16] = dataVersion[0]; 193 b[17] = dataVersion[1]; 194 b[18] = dataVersion[2]; 195 b[19] = dataVersion[3]; 196 197 return b; 198 } 199 200 /** 201 * Takes a 16 bit number and returns a two byte array. 0th element is lower byte, 1st element is upper byte. 202 * Ex: x = 28,000. In binary: 0110 1101 0110 0000. This method will return: 203 * [0] = 0110 0000 or 0x60 204 * [1] = 0110 1101 or 0x6D 205 */ shortToBytes(short x)206 private static byte[] shortToBytes(short x) { 207 byte[] b = new byte[2]; 208 byte mask = (byte) 0xFF; 209 b[1] = (byte) (x & mask); // bitwise and with the lower byte 210 b[0] = (byte) ((x >>> 8) & mask); // shift four bits to the right and fill with zeros, and then bitwise and with 211 // the lower byte 212 return b; 213 } 214 } 215