• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *******************************************************************************
3  * Copyright (C) 2003-2012, International Business Machines Corporation and    *
4  * others. All Rights Reserved.                                                *
5  *******************************************************************************
6  */
7 
8 package org.unicode.cldr.icu;
9 
10 import java.io.File;
11 import java.io.FileNotFoundException;
12 import java.io.FileOutputStream;
13 import java.io.IOException;
14 import java.util.Hashtable;
15 
16 import com.ibm.icu.lang.UCharacter;
17 import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory;
18 import com.ibm.icu.text.UTF16;
19 
20 /**
21  * The LDML2ICUBinaryWriter class is a set of methods which can be used
22  * to generate Binary (.res) files in the ICU Binary format.
23  *
24  * @author Brian Rower - June 2008
25  *
26  */
27 public class LDML2ICUBinaryWriter {
28     /**
29      * This string is the copyright to be written into the file.
30      * In the C version, can be found in <I>icu4c_root</I>/source/common/unicode/uversion.h
31      */
32     private static final String COPYRIGHT = " Copyright (C) 2012, International Business Machines Corporation and others. All Rights Reserved. ";
33 
34     public static int written = 0;
35 
36     /**
37      * Magic numbers!!!!
38      */
39     private static final byte MAGIC1 = (byte) 0xda;
40     private static final byte MAGIC2 = 0x27;
41 
42     private static boolean INCLUDE_COPYRIGHT = false;
43     /**
44      * The number of bytes it takes to write magic number 1.
45      */
46     private static final short BYTES_TAKEN_BY_MAGIC1 = 1;
47 
48     /**
49      * The number of bytes it takes to write magic number 2;
50      */
51     private static final short BYTES_TAKEN_BY_MAGIC2 = 1;
52 
53     /**
54      * The number of bytes that it takes to write the size of the header.
55      */
56     private static final short BYTES_TAKEN_BY_HEADER_SIZE = 2;
57 
58     /**
59      * The charsets to be used when encoding strings.
60      */
61     public static final String CHARSET8 = "UTF-8";
62     public static final String CHARSET16 = "UTF-16BE";
63 
64     /**
65      * The number of bytes that each character takes up. This is dependant on the encoding (see CHARSET above).
66      */
67     private static final int BYTES_PER_UTF8_CHAR = 1;
68 
69     /**
70      * Numeric constants for special elements.
71      */
72     private static final int SPECIAL_NONE = 0;
73     private static final int SPECIAL_COLLATIONS = 1;
74     private static final int SPECIAL_COLLATIONELEMENTS = 2;
75     private static final int SPECIAL_DEPENDENCY = 3;
76     private static final int SPECIAL_TRANSLITERATOR = 4;
77 
78     /**
79      * Numeric constants for types of resource items.
80      *
81      * @see ures_getType
82      * @stable ICU 2.0
83      */
84 
85     // **************************** ENUM Below is ported from C. See ures.h ***********************
86 
87     /** Resource type constant for "no resource". @stable ICU 2.6 */
88     public static final int URES_NONE = -1;
89 
90     /** Resource type constant for 16-bit Unicode strings. @stable ICU 2.6 */
91     public static final int URES_STRING = 0;
92 
93     /** Resource type constant for binary data. @stable ICU 2.6 */
94     public static final int URES_BINARY = 1;
95 
96     /** Resource type constant for tables of key-value pairs. @stable ICU 2.6 */
97     public static final int URES_TABLE = 2;
98 
99     /**
100      * Resource type constant for aliases;
101      * internally stores a string which identifies the actual resource
102      * storing the data (can be in a different resource bundle).
103      * Resolved internally before delivering the actual resource through the API.
104      *
105      * @stable ICU 2.6
106      */
107     public static final int URES_ALIAS = 3;
108 
109     /**
110      * Internal use only.
111      * Alternative resource type constant for tables of key-value pairs.
112      * Never returned by ures_getType().
113      *
114      * @internal
115      */
116     public static final int URES_TABLE32 = 4;
117 
118     /**
119      * Resource type constant for a single 28-bit integer, interpreted as
120      * signed or unsigned by the ures_getInt() or ures_getUInt() function.
121      *
122      * @see ures_getInt
123      * @see ures_getUInt
124      * @stable ICU 2.6
125      */
126     public static final int URES_INT = 7;
127 
128     /** Resource type constant for arrays of resources. @stable ICU 2.6 */
129     public static final int URES_ARRAY = 8;
130 
131     /**
132      * Resource type constant for vectors of 32-bit integers.
133      *
134      * @see ures_getIntVector
135      * @stable ICU 2.6
136      */
137     public static final int URES_INT_VECTOR = 14;
138 
139     public static final int URES_LIMIT = 16;
140 
141     /*
142      * The enum below is ported from C. See uresdata.h
143      *
144      * It is used as index references for the array which will be written.
145      */
146     /* [0] contains URES_INDEX_TOP==the length of indexes[] */
147     private static final int URES_INDEX_LENGTH = 0;
148     /* [1] contains the top of the strings, same as the bottom of resources, rounded up */
149     private static final int URES_INDEX_STRINGS_TOP = 1;
150     /* [2] contains the top of all resources */
151     private static final int URES_INDEX_RESOURCES_TOP = 2;
152     /* [3] contains the top of the bundle, in case it were ever different from [2] */
153     private static final int URES_INDEX_BUNDLE_TOP = 3;
154     /* [4] max. length of any table */
155     private static final int URES_INDEX_MAX_TABLE_LENGTH = 4;
156     /* [5] attributes bit set, see URES_ATT_* (new in formatVersion 1.2) */
157     // private static final int URES_INDEX_ATTRIBUTES = 5;
158     /* This one is the length of the array */
159     private static final int URES_INDEX_TOP = 6;
160 
161     // must be set if writing transliteration
162     private static Hashtable<String, String> ruleStringsHash = null;
163 
main()164     public static void main() {
165 
166     }
167 
168     /**
169      * This method is called upon the top of an ICUResourceWriter.Resource
170      * in order to write the whole Resource tree into binary format.
171      *
172      * @param resTop
173      *            The top of the resource tree that you would like written to file. This
174      *            object should be a ICUResourceWriter.ResourceTable.
175      * @param outDir
176      *            A string pointing to the path of the output directory.
177      * @param outFile
178      *            The name of the output file. If filename has an extension other than .res
179      *            (ex: .txt) this method will strip that extention and replace with .res.
180      */
writeBinaryFile(ICUResourceWriter.Resource resTop, String outDir, String outFile)181     public static void writeBinaryFile(ICUResourceWriter.Resource resTop, String outDir, String outFile) {
182         String fileName = "";
183         int usedOffset = 0;
184         String directoryPath = "";
185         FileOutputStream out;
186         UDataInfo info;
187         byte[] dataFormat;
188         byte[] formatVersion;
189         byte[] dataVersion;
190         byte[] padding;
191 
192         // Do some checks on the file name
193         // if it has a period in it...get rid of everything after the period
194         if (outFile.indexOf('.') > -1) {
195             fileName = outFile.substring(0, outFile.indexOf('.'));
196             if (fileName.length() == 0) {
197                 printError(outFile + " is not a valid file name.");
198                 System.exit(1);
199             }
200             fileName = fileName + ".res";
201         } else {
202             fileName = outFile + ".res";
203         }
204         // add the .res part to the file name
205 
206         // do some checks on the directory path
207         // replace all backslashes with forward slashes
208         directoryPath = outDir.replace('\\', '/');
209 
210         // if the path does not end in a slash, then we'll add one
211         if (directoryPath.charAt(directoryPath.length() - 1) != '/') {
212             directoryPath = directoryPath + "/";
213         }
214 
215         // create UDataInfo
216         // Data format is "ResB"
217         dataFormat = new byte[4];
218         dataFormat[0] = 0x52; // R
219         dataFormat[1] = 0x65; // e
220         dataFormat[2] = 0x73; // s
221         dataFormat[3] = 0x42; // B
222 
223         // Format version is 1.2.0.0
224         formatVersion = new byte[4];
225         formatVersion[0] = 1;
226         formatVersion[1] = 2;
227         formatVersion[2] = 0;
228         formatVersion[3] = 0;
229 
230         // data version is 1.4.0.0
231         dataVersion = new byte[4];
232         dataVersion[0] = 1;
233         dataVersion[1] = 4;
234         dataVersion[2] = 0;
235         dataVersion[3] = 0;
236 
237         // now that the file and directory name are formatted, lets try to create an output stream
238         try {
239             System.out.println("Creating file: " + directoryPath + fileName);
240             File f = new File(directoryPath, fileName);
241             out = new FileOutputStream(f);
242 
243             info = new UDataInfo(UDataInfo.getSize(), (short) 0, UDataInfo.BIGENDIAN, UDataInfo.ASCII_FAMILY,
244                 UDataInfo.SIZE_OF_UCHAR, (byte) 0, dataFormat, formatVersion, dataVersion);
245 
246             // this method goes through the tree and looks for a table named CollationElements or Collations, and adds
247             // the
248             // appropriate data to the tree
249             dealWithSpecialElements(resTop, outDir);
250 
251             // before we do anything with the resources, sort them
252             resTop.sort();
253 
254             // call writeBinaryHeader.
255             writeBinaryHeader(out, info, COPYRIGHT);
256 
257             usedOffset = writeKeyString(out, resTop);
258 
259             // Call writeBinary on the top of the Resource tree
260 
261             usedOffset = resTop.writeBinary(out, usedOffset);
262             padding = createPadding(pad32(usedOffset));
263             if (padding != null) {
264                 out.write(padding);
265                 written += padding.length;
266             }
267             out.close();
268             System.out.println("Finished writing binary.");
269         } catch (FileNotFoundException e) {
270             printError(directoryPath + fileName + " could not be opened, please ensure the correct path is given.");
271             e.printStackTrace();
272             System.exit(1);
273         } catch (SecurityException e) {
274             printError("access denied: " + directoryPath + fileName);
275             e.printStackTrace();
276             System.exit(1);
277         } catch (Exception e) {
278             printError(e.getMessage());
279             System.exit(1);
280         }
281     }
282 
getSpecialType(ICUResourceWriter.Resource res)283     private static int getSpecialType(ICUResourceWriter.Resource res) {
284         if (!res.hasKey) {
285             return SPECIAL_NONE;
286         }
287 
288         if (res.name.equals("CollationElements") && res instanceof ICUResourceWriter.ResourceTable) {
289             return SPECIAL_COLLATIONELEMENTS;
290         }
291 
292         if (res.name.equals("collations") && res instanceof ICUResourceWriter.ResourceTable) {
293             return SPECIAL_COLLATIONS;
294         }
295 
296         if (res.name.equals("depends") && res instanceof ICUResourceWriter.ResourceProcess) {
297             return SPECIAL_DEPENDENCY;
298         }
299 
300         if (res instanceof ICUResourceWriter.ResourceProcess) {
301             if (((ICUResourceWriter.ResourceProcess) res).ext.equals(ICUResourceWriter.TRANSLITERATOR)) {
302                 return SPECIAL_TRANSLITERATOR;
303             }
304         }
305 
306         return SPECIAL_NONE;
307     }
308 
309     /**
310      *
311      * Goes through the resource tree recursively and looks for a table named
312      * CollationElements, collations, dependency, or transliterator and adds the appropriate data
313      *
314      * @param top
315      *            The top of the Resource Tree
316      */
dealWithSpecialElements(ICUResourceWriter.Resource top, String outDir)317     private static void dealWithSpecialElements(ICUResourceWriter.Resource top, String outDir) {
318         // if it's a table
319         if (top instanceof ICUResourceWriter.ResourceTable) {
320             // loop through all it's elements and check if they're anything specialCollationElements or Collation
321             ICUResourceWriter.Resource cur = top.first;
322             while (cur != null) {
323                 switch (getSpecialType(cur)) {
324                 case SPECIAL_COLLATIONELEMENTS:
325                     addCollation(cur);
326                     break;
327                 case SPECIAL_COLLATIONS:
328                     addCollationElements(cur);
329                     break;
330                 case SPECIAL_DEPENDENCY:
331                     addDependency((ICUResourceWriter.ResourceTable) top, (ICUResourceWriter.ResourceProcess) cur,
332                         outDir);
333                     break;
334                 case SPECIAL_TRANSLITERATOR:
335                     addTransliteration((ICUResourceWriter.ResourceTable) top, (ICUResourceWriter.ResourceProcess) cur);
336                     break;
337                 case SPECIAL_NONE:
338                 default:
339                     dealWithSpecialElements(cur, outDir);
340                 }
341 
342                 cur = cur.next;
343             }
344         }
345         // if it's not a table...don't do anything...
346     }
347 
setRulesHash(Hashtable<String, String> hash)348     public static void setRulesHash(Hashtable<String, String> hash) {
349         ruleStringsHash = hash;
350     }
351 
352     // Parallels the C function for parseTransliterator in parse.c of genrb
addTransliteration(ICUResourceWriter.ResourceTable parent, ICUResourceWriter.ResourceProcess trans)353     private static void addTransliteration(ICUResourceWriter.ResourceTable parent,
354         ICUResourceWriter.ResourceProcess trans) {
355         if (ruleStringsHash == null) {
356             System.err.println("If you are processing transliteration, you must set the Rules Hashtable.");
357             System.exit(-1);
358         }
359 
360         String dataString = ruleStringsHash.get(trans.val);
361 
362         if (dataString == null) {
363             System.err.println("Could not find data for: " + trans.val);
364             System.exit(-1);
365         }
366 
367         // strip out the unneeded stuff from the buffer (like comments and spaces and line breaks
368         dataString = stripRules(dataString);
369 
370         // create a string resource containing the data and add it to the resource tree
371         // remove the ResourceProcess and add the String
372 
373         ICUResourceWriter.ResourceString replacement = new ICUResourceWriter.ResourceString("Resource", dataString);
374 
375         ICUResourceWriter.Resource current = parent.first;
376 
377         // yes, we're using an address comparison below...because they should both be pointing the the same object when
378         // we find it.
379         if (current != trans) {
380             while (current != null && current.next != trans) {
381                 current = current.next;
382             }
383             if (current != null) {
384                 replacement.next = trans.next;
385                 current.next = replacement;
386             } else {
387                 System.err.println("An unexpected error has occured: Could not find Transliteration resource.");
388                 System.exit(-1);
389             }
390         } else {
391             replacement.next = trans.next;
392             parent.first = replacement;
393         }
394 
395     }
396 
isUWhiteSpace(char c)397     private static boolean isUWhiteSpace(char c) {
398         return (c >= 0x0009 && c <= 0x2029 && (c <= 0x000D || c == 0x0020 || c == 0x0085 ||
399             c == 0x200E || c == 0x200F || c >= 0x2028));
400     }
401 
isNewLine(char c)402     private static boolean isNewLine(char c) {
403         if (c == 0x000d || c == 0x000a) {
404             return true;
405         }
406         return false;
407     }
408 
isPunctuation(char c)409     private static boolean isPunctuation(char c) {
410         int x = UCharacter.getType(c);
411         switch (x) {
412         case ECharacterCategory.CONNECTOR_PUNCTUATION:
413         case ECharacterCategory.DASH_PUNCTUATION:
414         case ECharacterCategory.END_PUNCTUATION:
415         case ECharacterCategory.FINAL_PUNCTUATION:
416         case ECharacterCategory.INITIAL_PUNCTUATION:
417         case ECharacterCategory.OTHER_PUNCTUATION:
418         case ECharacterCategory.START_PUNCTUATION:
419             return true;
420         default:
421             return false;
422         }
423     }
424 
isControl(char c)425     private static boolean isControl(char c) {
426         int x = UCharacter.getType(c);
427         switch (x) {
428         case ECharacterCategory.CONTROL:
429             return true;
430         default:
431             return false;
432         }
433     }
434 
435     // parallels the C++ function utrans_stripRules in rbt_pars.cpp in i18n project
stripRules(String data)436     private static String stripRules(String data) {
437         String newData = "";
438         int currentIndex = 0;
439         char curChar;
440         char curChar2 = '0';
441         boolean needChar2 = false;
442         boolean quoted = false;
443 
444         try {
445 
446             while (currentIndex < data.length()) {
447                 needChar2 = false;
448                 curChar = data.charAt(currentIndex);
449                 // if it's a quote, set the flag
450                 if (curChar == '\'') {
451                     quoted = !quoted;
452                 }
453                 // otherwise...if the quote flag is NOT set.
454                 else if (!quoted) {
455                     // IF comment... ignore comment lines ...starting with #....and until a carriage return or line feed
456                     if (curChar == '#') {
457                         // if the preceeding characters were whitepace or new lines, go back and get rid of them
458 
459                         while (newData.length() > 0
460                             && (isNewLine(newData.charAt(newData.length() - 1)) || isUWhiteSpace(newData.charAt(newData
461                                 .length() - 1)))) {
462                             if (newData.length() == 1) {
463                                 newData = "";
464                             } else {
465                                 newData = newData.substring(0, newData.length() - 2);
466                             }
467 
468                         }
469 
470                         // move to the end of the line
471                         while (!isNewLine(curChar) && currentIndex < data.length()) {
472                             currentIndex++;
473                             if (currentIndex < data.length()) {
474                                 curChar = data.charAt(currentIndex);
475                             }
476                         }
477                         // grab the first character of this new line (no longer part of the comment
478                         currentIndex++;
479                         if (currentIndex < data.length()) {
480                             curChar = data.charAt(currentIndex);
481                         }
482 
483                     } else if (curChar == '\\') // OR if its an escape char //((UChar)0x005C) - \
484                     {
485                         // skip over the \ and then skip any line breaks that may follow
486                         do {
487                             currentIndex++;
488                             if (currentIndex < data.length()) {
489                                 curChar = data.charAt(currentIndex);
490                             }
491                         } while (isNewLine(curChar) && currentIndex < data.length());
492 
493                         // if it's a u and there are 4 more characters after it
494                         if (curChar == 'u' && (data.length() - currentIndex) >= 4) {
495                             // convert it to a character from a codepoint (String)UTF16.valueOf(int)
496 
497                             String hexString = data.substring(currentIndex + 1, currentIndex + 5);
498                             int codeNum = Integer.parseInt(hexString, 16);
499                             String temp = UTF16.valueOf(codeNum);
500                             char tempChar;
501 
502                             tempChar = temp.charAt(0);
503 
504                             // if its 0xFFFFFFFF
505                             if (tempChar == 0xFFFFFFFF) {
506                                 System.err.println("Invalid character found while processing file.");
507                                 System.exit(-1);
508                             }
509                             // if NOT whitespace(isUWhiteSpace) && NOT a control character? && not punctuation
510                             if (!isUWhiteSpace(tempChar) && !isPunctuation(tempChar) && !isControl(tempChar)) {
511                                 // set the current character to this character
512                                 curChar = tempChar;
513                                 currentIndex += 4; // the 4 numbers...will add one more for the u, already did one for
514                                 // the slash
515                                 if (temp.length() > 1) {
516                                     curChar2 = temp.charAt(1);
517                                     needChar2 = true;
518                                 }
519                             }
520 
521                         }
522 
523                     } else if (curChar == '\'')// OR if it's a quote
524                     {
525                         quoted = !quoted;
526                     }
527                 } // end not quoted
528 
529                 if (isNewLine(curChar)) {
530                     quoted = false;
531                     // while we're not hitting the end of the string
532                     while (currentIndex < data.length()) {
533                         if (!isNewLine(curChar)) {
534                             break;
535                         }
536                         currentIndex++;
537                         if (currentIndex < data.length()) {
538                             curChar = data.charAt(currentIndex);
539                         }
540                     }
541                     continue;
542                 }
543 
544                 // append the character to the new string, because we've decided it's ok
545                 newData += curChar;
546                 currentIndex++;
547                 if (needChar2) {
548                     newData += curChar2;
549                 }
550             } // end loop
551 
552         } catch (Exception e) {
553             System.err.println("Had a problem...");
554         }
555         if (newData.length() > data.length()) {
556             return null;
557         }
558         return newData;
559     }
560 
addDependency(ICUResourceWriter.ResourceTable parent, ICUResourceWriter.ResourceProcess dep, String outDir)561     private static void addDependency(ICUResourceWriter.ResourceTable parent, ICUResourceWriter.ResourceProcess dep,
562         String outDir) {
563         String filename;
564         File f;
565 
566         filename = outDir;
567         if (!(outDir.charAt(outDir.length() - 1) == '/' || outDir.charAt(outDir.length() - 1) == '\\')) {
568             filename += "/";
569         }
570 
571         filename += dep.val;
572 
573         f = new File(filename);
574         if (!f.exists()) {
575             System.err.println("WARNING: Could not find dependancy: " + filename);
576         }
577         // create the %%DEPENDENCY array with a string containing the path, add it to the table.
578         ICUResourceWriter.ResourceArray a = new ICUResourceWriter.ResourceArray();
579         a.name = "%%DEPENDENCY";
580         ICUResourceWriter.ResourceString str = new ICUResourceWriter.ResourceString(null, dep.val);
581         a.first = str;
582         dep.addAfter(a);
583 
584         // Remove the ResourceProcess object and replace it with a ResourceString object.
585         ICUResourceWriter.ResourceString replacement = new ICUResourceWriter.ResourceString(dep.name, dep.val);
586 
587         ICUResourceWriter.Resource current = parent.first;
588 
589         // yes, we're using an address comparison below...because they should both be pointing the the same object when
590         // we find it.
591         while (current != null && current.next != dep) {
592             current = current.next;
593         }
594         replacement.next = dep.next;
595         current.next = replacement;
596 
597     }
598 
addCollationElements(ICUResourceWriter.Resource elementTable)599     private static void addCollationElements(ICUResourceWriter.Resource elementTable) {
600         // Element table name is "Collation"
601         // loops through sub tables of Collation and adds CollationBinary as nessisary
602         ICUResourceWriter.Resource cur = elementTable.first;
603 
604         while (cur != null) {
605             addCollation(cur);
606             cur = cur.next;
607         }
608     }
609 
addCollation(ICUResourceWriter.Resource element)610     private static void addCollation(ICUResourceWriter.Resource element) {
611         ICUResourceWriter.Resource cur = element.first;
612 
613         while (cur != null) {
614             if (cur.hasKey && (cur instanceof ICUResourceWriter.ResourceString)) {
615                 ICUResourceWriter.ResourceString strElement = (ICUResourceWriter.ResourceString) cur;
616 
617                 if (strElement.name.equals("Sequence")) {
618                     try {
619                         // RuleBasedCollator rbc = new RuleBasedCollator(strElement.val);
620                         // TODO Generate proper binary data for Collator
621                         /*
622                          * currently CollatorWriter does not work properly
623                          * Need to write something to generate proper bytes,
624                          * bytes do not seem to exist at this time
625                          * CollatorWriter was not committed to the ICU4J trunk, it currently lives in the bdrower
626                          * subdirectory of icu4j in the IBM local cvs
627                          */
628                         // byte[] bytes = CollatorWriter.writeRBC(rbc);
629                         // ICUResourceWriter.ResourceBinary b = new ICUResourceWriter.ResourceBinary();
630                         // b.data = bytes;
631                         // b.name = "%%CollationBin";
632                         // element.addAfter(b);
633 
634                     } catch (Exception e) {
635                         System.err.println("Could not create Collation Binary");
636                     }
637                 }
638             }
639             cur = cur.next;
640         }
641     }
642 
643     /**
644      * Write the header section of the file. This section of the file currently contains:<br>
645      * -A 2 byte number containing the length (in bytes) of the header.<br>
646      * -Two "magic numbers" each 1 byte in size.<br>
647      * -The UDataInfo structure
648      * -The null terminated copyright string (if it should be written)
649      *
650      * @param out
651      * @param info
652      * @param copyright
653      */
writeBinaryHeader(FileOutputStream out, UDataInfo info, String copyright)654     private static void writeBinaryHeader(FileOutputStream out, UDataInfo info, String copyright) {
655         short headSize = 0;
656         byte[] magics = new byte[2];
657         int pad = 0;
658         byte[] padding;
659         /*
660          * The header includes a 2 byte number containing the size of the header,
661          * two magic numbers each 1 byte in size, the UDataInfo structure, and the
662          * copyright plus null terminator. Subject to change.
663          */
664         headSize += info.size + BYTES_TAKEN_BY_HEADER_SIZE + BYTES_TAKEN_BY_MAGIC1 + BYTES_TAKEN_BY_MAGIC2;
665         if (copyright != null && INCLUDE_COPYRIGHT) {
666             headSize += copyright.length() + 1;
667         }
668         if ((pad = pad16Bytes(headSize)) != 0) {
669             headSize += pad;
670         }
671 
672         magics[0] = MAGIC1;
673         magics[1] = MAGIC2;
674 
675         try {
676             // write the size of the header
677             out.write(shortToBytes(headSize));
678             written += (shortToBytes(headSize)).length;
679 
680             // write the two magic numbers
681             out.write(magics);
682             written += magics.length;
683 
684             // write the UDataInfo structure
685             out.write(info.getByteArray());
686             written += info.getByteArray().length;
687 
688             // write the copyright and null terminating byte(s) if writing it
689             if (copyright != null && INCLUDE_COPYRIGHT) {
690                 out.write((copyright + "\0").getBytes(CHARSET8));
691                 written += ((copyright + "\0").getBytes(CHARSET8)).length;
692 
693             }
694 
695             if (pad != 0) {
696                 padding = new byte[pad];
697                 for (int i = 0; i < padding.length; i++) {
698                     padding[i] = 0;
699                 }
700                 out.write(padding);
701                 written += padding.length;
702             }
703 
704         } catch (IOException e) {
705             printError(e.getMessage());
706             e.printStackTrace();
707             System.exit(1);
708         }
709     }
710 
711     /**
712      * Write some information about the key string and then write a chunk of bytes which mirrors the
713      * SRBRoot->fkeys character buffer. This will be a list of null
714      * terminated strings. Each string pertains to a certain resource. This method also modifies the resources in
715      * 'resTop' by setting the keyStringOffset variable. The keyStringOffset variable is the number of bytes from
716      * the start of the key string that the resources key starts. For example:
717      *
718      * <p>
719      * In the 'en_PK' locale, you may have a Table resource with the key "Version." The Table contains a string resource
720      * with the key "1.31."
721      * </p>
722      * <p>
723      * If this were the whole of the locale data, the key string would be an encoded version of this:
724      * </p>
725      *
726      * "Version\01.31\0"
727      *
728      * <br>
729      * <br>
730      * In UTF-16 encoding, each character will take 2 bytes. <br>
731      * keyStringOffset for the table object would be 0. <br>
732      * keyStringOffset for the string resource would be = "Version".length() + 2 = 16
733      *
734      *
735      * @param out
736      *            The output stream to write this to.
737      * @param resTop
738      *            The top of the resource tree whose keys shall be written
739      */
writeKeyString(FileOutputStream out, ICUResourceWriter.Resource resTop)740     private static int writeKeyString(FileOutputStream out, ICUResourceWriter.Resource resTop) {
741         String keyList = "";
742         byte[] padding = null;
743         int padBytes = 0;
744         int end;
745         int root;
746         byte[] rootBytes;
747         int[] indexes = new int[URES_INDEX_TOP];
748         byte[] indexBytes = new byte[URES_INDEX_TOP * 4];
749         byte[] keyBytes;
750         int usedOffset;
751         int sizeOfIndexes;
752         int sizeOfIndexesAndKeys;
753         int tableID;
754 
755         // set flag so that we know which resource is the top of the tree
756         resTop.isTop = true;
757 
758         sizeOfIndexes = (1 + URES_INDEX_TOP) * ICUResourceWriter.SIZE_OF_INT;
759 
760         usedOffset = sizeOfIndexes;
761 
762         // Build the String of keys
763         keyList = buildKeyList(keyList, resTop, usedOffset);
764 
765         sizeOfIndexesAndKeys = sizeOfIndexes + keyList.length();
766         usedOffset = sizeOfIndexesAndKeys + pad32(sizeOfIndexesAndKeys);
767 
768         end = sizeOfIndexesAndKeys + resTop.sizeOfChildren;
769 
770         // if it is not 16 byte aligned
771         if ((padBytes = pad32(sizeOfIndexesAndKeys)) != 0) {
772             padding = createPadding(padBytes);
773             if (padding != null) {
774                 usedOffset += padding.length;
775                 end += padding.length;
776             }
777 
778         }
779 
780         // build a set of 32 bits (in C this variable is called 'root' in reslist.c)
781         // the number of bytes included in the keyList, keyList padding, all the children
782 
783         if (((ICUResourceWriter.ResourceTable) resTop).is32Bit()) {
784             tableID = (URES_TABLE32 << 28);
785         } else {
786             tableID = (URES_TABLE << 28);
787         }
788         root = (end >>> 2) | (tableID);
789 
790         rootBytes = intToBytes(root);
791 
792         end += resTop.size;
793 
794         end += pad32(end);
795 
796         indexes[URES_INDEX_LENGTH] = URES_INDEX_TOP;
797         indexes[URES_INDEX_STRINGS_TOP] = usedOffset >>> 2;
798         indexes[URES_INDEX_RESOURCES_TOP] = (end) >> 2;
799         indexes[URES_INDEX_BUNDLE_TOP] = indexes[URES_INDEX_RESOURCES_TOP];
800         indexes[URES_INDEX_MAX_TABLE_LENGTH] = ICUResourceWriter.maxTableLength;
801 
802         indexBytes = intArrayToBytes(indexes);
803 
804         try {
805             // write the "root" object
806             out.write(rootBytes);
807             written += rootBytes.length;
808 
809             // write the indexes array
810             out.write(indexBytes);
811             written += indexBytes.length;
812 
813             // write the keyList and padding if nessicary
814             keyBytes = keyList.getBytes(CHARSET8);
815             out.write(keyBytes);
816             written += keyBytes.length;
817 
818             if (padding != null) {
819                 out.write(padding);
820                 written += padding.length;
821             }
822         } catch (IOException e) {
823             printError("Could not write key string to file. " + e.getMessage());
824             System.exit(1);
825         }
826 
827         return usedOffset;
828     }
829 
830     /**
831      * Recursively go through the whole tree and continue to add to the keyList. As this is done,
832      * set the keyStringOffset, numChildren, sizeOfChildren, and size variables.
833      *
834      * @param keyList
835      *            The current string of keys.
836      * @param resTop
837      *            The resource whose keys shall be written to the keyList.
838      * @return
839      */
buildKeyList(String keyList, ICUResourceWriter.Resource resTop, int usedOffset)840     private static String buildKeyList(String keyList, ICUResourceWriter.Resource resTop, int usedOffset) {
841         ICUResourceWriter.Resource current = resTop.first;
842         int x = 0;
843 
844         // add this resources key to the list unless it is the top resource or doesn't have a key
845         if (!resTop.isTop && resTop.hasKey) {
846             // clean up quotes if any
847             if (resTop.name.indexOf("\"") >= 0) {
848                 resTop.name = removeQuotes(resTop.name);
849             }
850             // set the keyStringOffset
851             resTop.keyStringOffset = usedOffset + (keyList.length() * BYTES_PER_UTF8_CHAR);
852             keyList += (resTop.name + "\0");
853 
854         }
855 
856         // if it has children, call this method on them too
857         while (current != null) {
858             if (resTop instanceof ICUResourceWriter.ResourceArray
859                 || resTop instanceof ICUResourceWriter.ResourceIntVector) {
860                 current.hasKey = false;
861             }
862 
863             keyList = buildKeyList(keyList, current, usedOffset);
864             x++;
865 
866             // add the size of the current child to the parents sizeOfChildren
867 
868             current = current.next;
869         }
870 
871         // set the size of this object
872         resTop.setSize();
873 
874         resTop.numChildren = x;
875         return keyList;
876     }
877 
878     /**
879      * Takes a 16 bit number and returns a two byte array. 0th element is lower byte, 1st element is upper byte.
880      * Ex: x = 28,000. In binary: 0110 1101 0110 0000. This method will return:
881      * [0] = 0110 0000 or 0x60
882      * [1] = 0110 1101 or 0x6D
883      */
shortToBytes(short x)884     private static byte[] shortToBytes(short x) {
885         byte[] b = new byte[2];
886         b[1] = (byte) (x); // bitwise AND with the lower byte
887         b[0] = (byte) (x >>> 8); // shift four bits to the right and fill with zeros, and then bitwise and with the
888         // lower byte
889         return b;
890     }
891 
892     /**
893      * Takes a 32 bit integer and returns an array of 4 bytes.
894      *
895      */
intToBytes(int x)896     private static byte[] intToBytes(int x) {
897         byte[] b = new byte[4];
898         b[3] = (byte) (x); // just the last byte
899 
900         x = x >>> 8; // shift each byte over one spot.
901         b[2] = (byte) (x); // just the last byte
902 
903         x = x >>> 8; // shift each byte over one spot.
904         b[1] = (byte) (x); // just the last byte
905 
906         x = x >>> 8; // shift each byte over one spot.
907         b[0] = (byte) (x); // just the last byte
908 
909         return b;
910     }
911 
912     /**
913      * Takes an array of integers and returns a byte array of the memory representation.
914      *
915      * @param x
916      * @return
917      */
intArrayToBytes(int[] x)918     private static byte[] intArrayToBytes(int[] x) {
919         byte[] b = new byte[x.length * 4];
920         byte[] temp;
921         int i, z;
922 
923         for (i = 0; i < x.length; i++) {
924             temp = intToBytes(x[i]);
925             for (z = 0; z < 4; z++) {
926                 b[(i * 4) + z] = temp[z];
927             }
928         }
929         return b;
930     }
931 
932     /**
933      * calculate the padding to make things align with 32 bits (aka 4 bytes)
934      *
935      * @param x
936      * @return
937      */
pad32(int x)938     private static int pad32(int x) {
939         return ((x % 4) == 0) ? 0 : (4 - (x % 4));
940     }
941 
pad16Bytes(int x)942     private static int pad16Bytes(int x) {
943         return ((x % 16) == 0) ? 0 : (16 - (x % 16));
944     }
945 
946     /**
947      * for printing errors.
948      */
printError(String message)949     private static void printError(String message) {
950 
951         System.err.println("LDML2ICUBinaryWriter : ERROR : " + message);
952     }
953 
createPadding(int length)954     private static byte[] createPadding(int length) {
955         byte x = (byte) 0x00;
956         byte[] b = new byte[length];
957         if (length == 0) {
958             return null;
959         }
960         for (int z = 0; z < b.length; z++) {
961             b[z] = x;
962         }
963 
964         return b;
965     }
966 
removeQuotes(String s)967     public static String removeQuotes(String s) {
968         String temp = s;
969         String temp2;
970         int x;
971         while (temp.indexOf("\"") >= 0) {
972             x = temp.indexOf("\"");
973             temp2 = temp.substring(0, x);
974             temp2 += temp.substring(x + 1, temp.length());
975             temp = temp2;
976         }
977 
978         return temp;
979     }
980 
981 }