• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.icu;
2 
3 /**
4  * This Class is the Java representation of the ICU4C structure UDataInfo which can be found in
5  * <I>$icu4c_root</I>/source/common/unicode/udata.h
6  *
7  * <p>This class is used by LDML2ICUBinaryWriter to store information that must be written in the
8  * ICU Binary format.
9  *
10  * <p>Note that if this data structure ever grows, the getSize() method must be updated.
11  *
12  * @author Brian Rower - June 2008
13  */
14 public class UDataInfo {
15 
16     /**
17      * Use to signify that this data is in Big Endian form. Currently the only mode supported in
18      * Java is Big Endian.
19      */
20     public static final byte BIGENDIAN = 1;
21 
22     /**
23      * charsetFamily is equal to this value when the platform is an ASCII based platform. Currently
24      * the only mode supported in Java is ASCII This mirrors the ICU4C version in
25      * <I>$icu4c_root</I>/source/common/unicode/utypes.h
26      */
27     public static final byte ASCII_FAMILY = 0;
28 
29     /**
30      * This is the value for setting sizeofUChar. Currently it is 16 bits (2 bytes). UChar is
31      * currently defined in <I>$icu4c_root</I>/source/common/unicode/umachine.h
32      */
33     public static final byte SIZE_OF_UCHAR = 2;
34 
35     /**
36      * This field stores the size of this data structure in memory. Add up the size of each part of
37      * it.
38      */
39     public short size;
40 
41     /** This field is currently unused, set it to zero. */
42     public short reservedWord;
43 
44     /**
45      * This field is used to signify the Endian mode of a system. Choose from the static final int's
46      * provided in this class. In Java, there is only one possibility: Big Endian.
47      */
48     public byte isBigEndian;
49 
50     /** This field stores the character set which is being used. */
51     public byte charsetFamily;
52 
53     /** Size of the UChar structure in C. */
54     public byte sizeofUChar;
55 
56     /** This field is currently unused, set it to zero. */
57     public byte reservedByte;
58 
59     /** This field stores an identifier for the data format. Array should be of length 4. */
60     public byte[] dataFormat;
61 
62     /**
63      * This field stores the Format version. Array should be of length 4.<br>
64      * [0] = major<br>
65      * [1] = minor<br>
66      * [2] = milli<br>
67      * [3] = micro<br>
68      */
69     public byte[] formatVersion;
70 
71     /**
72      * This field stores the data version. Array should be of length 4.<br>
73      * [0] = major<br>
74      * [1] = minor<br>
75      * [2] = milli<br>
76      * [3] = micro<br>
77      */
78     public byte[] dataVersion;
79 
80     class IncorrectArrayLengthException extends Exception {
81         /** */
82         private static final long serialVersionUID = -3238261375903639881L;
83 
IncorrectArrayLengthException(String message)84         IncorrectArrayLengthException(String message) {
85             super(message);
86         }
87     }
88 
UDataInfo( short size, short reservedWord, byte isBigEndian, byte charsetFamily, byte sizeofUChar, byte reservedByte, byte[] dataFormat, byte[] formatVersion, byte[] dataVersion)89     public UDataInfo(
90             short size,
91             short reservedWord,
92             byte isBigEndian,
93             byte charsetFamily,
94             byte sizeofUChar,
95             byte reservedByte,
96             byte[] dataFormat,
97             byte[] formatVersion,
98             byte[] dataVersion)
99             throws IncorrectArrayLengthException {
100         if (dataFormat.length != 4) {
101             throw new IncorrectArrayLengthException(
102                     "The byte array 'dataFormat' must be of length 4.");
103         }
104         if (formatVersion.length != 4) {
105             throw new IncorrectArrayLengthException(
106                     "The byte array 'formatVersion' must be of length 4.");
107         }
108         if (dataVersion.length != 4) {
109             throw new IncorrectArrayLengthException(
110                     "The byte array 'dataVersion' must be of length 4.");
111         }
112         this.size = size;
113         this.reservedWord = reservedWord;
114         this.isBigEndian = isBigEndian;
115         this.charsetFamily = charsetFamily;
116         this.sizeofUChar = sizeofUChar;
117         this.reservedByte = reservedByte;
118         this.dataFormat = dataFormat;
119         this.formatVersion = formatVersion;
120         this.dataVersion = dataVersion;
121     }
122 
123     /**
124      * This method returns the size that this structure will occupy when written to binary file.
125      * byte = 1 byte <br>
126      * short = 2 bytes<br>
127      * int = 4 bytes<br>
128      * long = 8 bytes<br>
129      * float = 4 bytes<br>
130      * double = 8 bytes<br>
131      * char = 2 bytes<br>
132      *
133      * @return The number of bytes that UDataInfo occupies
134      */
getSize()135     public static short getSize() {
136         /*
137          * number of short elements = 2
138          * number of byte elements = 4
139          * number of byte array elements of length 4 = 3
140          * 2*2 + 4*1 + 3*4 = 4 + 4 + 12 = 20 bytes
141          */
142         return 20;
143     }
144 
145     /**
146      * Returns a byte array representing the UDataStructure so that it can be written byte by byte.
147      *
148      * @returns a byte array of the contents of this UDataStructure.
149      */
getByteArray()150     public byte[] getByteArray() {
151         // This size may change, see get size method above.
152         byte[] b = new byte[20];
153         byte[] sizeBytes = shortToBytes(size);
154 
155         // write the size
156         b[0] = sizeBytes[0];
157         b[1] = sizeBytes[1];
158 
159         // write the reserved word (a bunch of zeros)
160         b[2] = 0;
161         b[3] = 0;
162 
163         // write isBigEndian
164         b[4] = isBigEndian;
165 
166         // write charsetFamily
167         b[5] = charsetFamily;
168 
169         // write sizeofUChar
170         b[6] = sizeofUChar;
171 
172         // write reserved byte (some zeros)
173         b[7] = 0;
174 
175         // write the dataFormat
176         b[8] = dataFormat[0];
177         b[9] = dataFormat[1];
178         b[10] = dataFormat[2];
179         b[11] = dataFormat[3];
180 
181         // write the formatVersion
182         b[12] = formatVersion[0];
183         b[13] = formatVersion[1];
184         b[14] = formatVersion[2];
185         b[15] = formatVersion[3];
186 
187         // write the dataVersion
188         b[16] = dataVersion[0];
189         b[17] = dataVersion[1];
190         b[18] = dataVersion[2];
191         b[19] = dataVersion[3];
192 
193         return b;
194     }
195 
196     /**
197      * Takes a 16 bit number and returns a two byte array. 0th element is lower byte, 1st element is
198      * upper byte. Ex: x = 28,000. In binary: 0110 1101 0110 0000. This method will return: [0] =
199      * 0110 0000 or 0x60 [1] = 0110 1101 or 0x6D
200      */
shortToBytes(short x)201     private static byte[] shortToBytes(short x) {
202         byte[] b = new byte[2];
203         byte mask = (byte) 0xFF;
204         b[1] = (byte) (x & mask); // bitwise and with the lower byte
205         b[0] = (byte) ((x >>> 8) & mask); // shift four bits to the right and fill with zeros, and
206         // then bitwise and with
207         // the lower byte
208         return b;
209     }
210 }
211