• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *******************************************************************************
3  * Copyright (C) 2003-2012, International Business Machines Corporation and    *
4  * others. All Rights Reserved.                                                *
5  *******************************************************************************
6  */
7 
8 package org.unicode.cldr.icu;
9 
10 import java.io.File;
11 import java.io.FileNotFoundException;
12 import java.io.FileOutputStream;
13 import java.io.IOException;
14 import java.util.Hashtable;
15 
16 import org.unicode.cldr.util.CldrUtility;
17 
18 import com.ibm.icu.lang.UCharacter;
19 import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory;
20 import com.ibm.icu.text.UTF16;
21 
22 /**
23  * The LDML2ICUBinaryWriter class is a set of methods which can be used
24  * to generate Binary (.res) files in the ICU Binary format.
25  *
26  * @author Brian Rower - June 2008
27  *
28  */
29 public class LDML2ICUBinaryWriter {
30     /**
31      * This string is the copyright to be written into the file.
32      * In the C version, can be found in <I>icu4c_root</I>/source/common/unicode/uversion.h
33      */
34     private static final String COPYRIGHT = CldrUtility.getCopyrightString();
35 
36     public static int written = 0;
37 
38     /**
39      * Magic numbers!!!!
40      */
41     private static final byte MAGIC1 = (byte) 0xda;
42     private static final byte MAGIC2 = 0x27;
43 
44     private static boolean INCLUDE_COPYRIGHT = false;
45     /**
46      * The number of bytes it takes to write magic number 1.
47      */
48     private static final short BYTES_TAKEN_BY_MAGIC1 = 1;
49 
50     /**
51      * The number of bytes it takes to write magic number 2;
52      */
53     private static final short BYTES_TAKEN_BY_MAGIC2 = 1;
54 
55     /**
56      * The number of bytes that it takes to write the size of the header.
57      */
58     private static final short BYTES_TAKEN_BY_HEADER_SIZE = 2;
59 
60     /**
61      * The charsets to be used when encoding strings.
62      */
63     public static final String CHARSET8 = "UTF-8";
64     public static final String CHARSET16 = "UTF-16BE";
65 
66     /**
67      * The number of bytes that each character takes up. This is dependant on the encoding (see CHARSET above).
68      */
69     private static final int BYTES_PER_UTF8_CHAR = 1;
70 
71     /**
72      * Numeric constants for special elements.
73      */
74     private static final int SPECIAL_NONE = 0;
75     private static final int SPECIAL_COLLATIONS = 1;
76     private static final int SPECIAL_COLLATIONELEMENTS = 2;
77     private static final int SPECIAL_DEPENDENCY = 3;
78     private static final int SPECIAL_TRANSLITERATOR = 4;
79 
80     /**
81      * Numeric constants for types of resource items.
82      *
83      * @see ures_getType
84      * @stable ICU 2.0
85      */
86 
87     // **************************** ENUM Below is ported from C. See ures.h ***********************
88 
89     /** Resource type constant for "no resource". @stable ICU 2.6 */
90     public static final int URES_NONE = -1;
91 
92     /** Resource type constant for 16-bit Unicode strings. @stable ICU 2.6 */
93     public static final int URES_STRING = 0;
94 
95     /** Resource type constant for binary data. @stable ICU 2.6 */
96     public static final int URES_BINARY = 1;
97 
98     /** Resource type constant for tables of key-value pairs. @stable ICU 2.6 */
99     public static final int URES_TABLE = 2;
100 
101     /**
102      * Resource type constant for aliases;
103      * internally stores a string which identifies the actual resource
104      * storing the data (can be in a different resource bundle).
105      * Resolved internally before delivering the actual resource through the API.
106      *
107      * @stable ICU 2.6
108      */
109     public static final int URES_ALIAS = 3;
110 
111     /**
112      * Internal use only.
113      * Alternative resource type constant for tables of key-value pairs.
114      * Never returned by ures_getType().
115      *
116      * @internal
117      */
118     public static final int URES_TABLE32 = 4;
119 
120     /**
121      * Resource type constant for a single 28-bit integer, interpreted as
122      * signed or unsigned by the ures_getInt() or ures_getUInt() function.
123      *
124      * @see ures_getInt
125      * @see ures_getUInt
126      * @stable ICU 2.6
127      */
128     public static final int URES_INT = 7;
129 
130     /** Resource type constant for arrays of resources. @stable ICU 2.6 */
131     public static final int URES_ARRAY = 8;
132 
133     /**
134      * Resource type constant for vectors of 32-bit integers.
135      *
136      * @see ures_getIntVector
137      * @stable ICU 2.6
138      */
139     public static final int URES_INT_VECTOR = 14;
140 
141     public static final int URES_LIMIT = 16;
142 
143     /*
144      * The enum below is ported from C. See uresdata.h
145      *
146      * It is used as index references for the array which will be written.
147      */
148     /* [0] contains URES_INDEX_TOP==the length of indexes[] */
149     private static final int URES_INDEX_LENGTH = 0;
150     /* [1] contains the top of the strings, same as the bottom of resources, rounded up */
151     private static final int URES_INDEX_STRINGS_TOP = 1;
152     /* [2] contains the top of all resources */
153     private static final int URES_INDEX_RESOURCES_TOP = 2;
154     /* [3] contains the top of the bundle, in case it were ever different from [2] */
155     private static final int URES_INDEX_BUNDLE_TOP = 3;
156     /* [4] max. length of any table */
157     private static final int URES_INDEX_MAX_TABLE_LENGTH = 4;
158     /* [5] attributes bit set, see URES_ATT_* (new in formatVersion 1.2) */
159     // private static final int URES_INDEX_ATTRIBUTES = 5;
160     /* This one is the length of the array */
161     private static final int URES_INDEX_TOP = 6;
162 
163     // must be set if writing transliteration
164     private static Hashtable<String, String> ruleStringsHash = null;
165 
main()166     public static void main() {
167 
168     }
169 
170     /**
171      * This method is called upon the top of an ICUResourceWriter.Resource
172      * in order to write the whole Resource tree into binary format.
173      *
174      * @param resTop
175      *            The top of the resource tree that you would like written to file. This
176      *            object should be a ICUResourceWriter.ResourceTable.
177      * @param outDir
178      *            A string pointing to the path of the output directory.
179      * @param outFile
180      *            The name of the output file. If filename has an extension other than .res
181      *            (ex: .txt) this method will strip that extention and replace with .res.
182      */
writeBinaryFile(ICUResourceWriter.Resource resTop, String outDir, String outFile)183     public static void writeBinaryFile(ICUResourceWriter.Resource resTop, String outDir, String outFile) {
184         String fileName = "";
185         int usedOffset = 0;
186         String directoryPath = "";
187         FileOutputStream out;
188         UDataInfo info;
189         byte[] dataFormat;
190         byte[] formatVersion;
191         byte[] dataVersion;
192         byte[] padding;
193 
194         // Do some checks on the file name
195         // if it has a period in it...get rid of everything after the period
196         if (outFile.indexOf('.') > -1) {
197             fileName = outFile.substring(0, outFile.indexOf('.'));
198             if (fileName.length() == 0) {
199                 printError(outFile + " is not a valid file name.");
200                 System.exit(1);
201             }
202             fileName = fileName + ".res";
203         } else {
204             fileName = outFile + ".res";
205         }
206         // add the .res part to the file name
207 
208         // do some checks on the directory path
209         // replace all backslashes with forward slashes
210         directoryPath = outDir.replace('\\', '/');
211 
212         // if the path does not end in a slash, then we'll add one
213         if (directoryPath.charAt(directoryPath.length() - 1) != '/') {
214             directoryPath = directoryPath + "/";
215         }
216 
217         // create UDataInfo
218         // Data format is "ResB"
219         dataFormat = new byte[4];
220         dataFormat[0] = 0x52; // R
221         dataFormat[1] = 0x65; // e
222         dataFormat[2] = 0x73; // s
223         dataFormat[3] = 0x42; // B
224 
225         // Format version is 1.2.0.0
226         formatVersion = new byte[4];
227         formatVersion[0] = 1;
228         formatVersion[1] = 2;
229         formatVersion[2] = 0;
230         formatVersion[3] = 0;
231 
232         // data version is 1.4.0.0
233         dataVersion = new byte[4];
234         dataVersion[0] = 1;
235         dataVersion[1] = 4;
236         dataVersion[2] = 0;
237         dataVersion[3] = 0;
238 
239         // now that the file and directory name are formatted, lets try to create an output stream
240         try {
241             System.out.println("Creating file: " + directoryPath + fileName);
242             File f = new File(directoryPath, fileName);
243             out = new FileOutputStream(f);
244 
245             info = new UDataInfo(UDataInfo.getSize(), (short) 0, UDataInfo.BIGENDIAN, UDataInfo.ASCII_FAMILY,
246                 UDataInfo.SIZE_OF_UCHAR, (byte) 0, dataFormat, formatVersion, dataVersion);
247 
248             // this method goes through the tree and looks for a table named CollationElements or Collations, and adds
249             // the
250             // appropriate data to the tree
251             dealWithSpecialElements(resTop, outDir);
252 
253             // before we do anything with the resources, sort them
254             resTop.sort();
255 
256             // call writeBinaryHeader.
257             writeBinaryHeader(out, info, COPYRIGHT);
258 
259             usedOffset = writeKeyString(out, resTop);
260 
261             // Call writeBinary on the top of the Resource tree
262 
263             usedOffset = resTop.writeBinary(out, usedOffset);
264             padding = createPadding(pad32(usedOffset));
265             if (padding != null) {
266                 out.write(padding);
267                 written += padding.length;
268             }
269             out.close();
270             System.out.println("Finished writing binary.");
271         } catch (FileNotFoundException e) {
272             printError(directoryPath + fileName + " could not be opened, please ensure the correct path is given.");
273             e.printStackTrace();
274             System.exit(1);
275         } catch (SecurityException e) {
276             printError("access denied: " + directoryPath + fileName);
277             e.printStackTrace();
278             System.exit(1);
279         } catch (Exception e) {
280             printError(e.getMessage());
281             System.exit(1);
282         }
283     }
284 
getSpecialType(ICUResourceWriter.Resource res)285     private static int getSpecialType(ICUResourceWriter.Resource res) {
286         if (!res.hasKey) {
287             return SPECIAL_NONE;
288         }
289 
290         if (res.name.equals("CollationElements") && res instanceof ICUResourceWriter.ResourceTable) {
291             return SPECIAL_COLLATIONELEMENTS;
292         }
293 
294         if (res.name.equals("collations") && res instanceof ICUResourceWriter.ResourceTable) {
295             return SPECIAL_COLLATIONS;
296         }
297 
298         if (res.name.equals("depends") && res instanceof ICUResourceWriter.ResourceProcess) {
299             return SPECIAL_DEPENDENCY;
300         }
301 
302         if (res instanceof ICUResourceWriter.ResourceProcess) {
303             if (((ICUResourceWriter.ResourceProcess) res).ext.equals(ICUResourceWriter.TRANSLITERATOR)) {
304                 return SPECIAL_TRANSLITERATOR;
305             }
306         }
307 
308         return SPECIAL_NONE;
309     }
310 
311     /**
312      *
313      * Goes through the resource tree recursively and looks for a table named
314      * CollationElements, collations, dependency, or transliterator and adds the appropriate data
315      *
316      * @param top
317      *            The top of the Resource Tree
318      */
dealWithSpecialElements(ICUResourceWriter.Resource top, String outDir)319     private static void dealWithSpecialElements(ICUResourceWriter.Resource top, String outDir) {
320         // if it's a table
321         if (top instanceof ICUResourceWriter.ResourceTable) {
322             // loop through all it's elements and check if they're anything specialCollationElements or Collation
323             ICUResourceWriter.Resource cur = top.first;
324             while (cur != null) {
325                 switch (getSpecialType(cur)) {
326                 case SPECIAL_COLLATIONELEMENTS:
327                     addCollation(cur);
328                     break;
329                 case SPECIAL_COLLATIONS:
330                     addCollationElements(cur);
331                     break;
332                 case SPECIAL_DEPENDENCY:
333                     addDependency((ICUResourceWriter.ResourceTable) top, (ICUResourceWriter.ResourceProcess) cur,
334                         outDir);
335                     break;
336                 case SPECIAL_TRANSLITERATOR:
337                     addTransliteration((ICUResourceWriter.ResourceTable) top, (ICUResourceWriter.ResourceProcess) cur);
338                     break;
339                 case SPECIAL_NONE:
340                 default:
341                     dealWithSpecialElements(cur, outDir);
342                 }
343 
344                 cur = cur.next;
345             }
346         }
347         // if it's not a table...don't do anything...
348     }
349 
setRulesHash(Hashtable<String, String> hash)350     public static void setRulesHash(Hashtable<String, String> hash) {
351         ruleStringsHash = hash;
352     }
353 
354     // Parallels the C function for parseTransliterator in parse.c of genrb
addTransliteration(ICUResourceWriter.ResourceTable parent, ICUResourceWriter.ResourceProcess trans)355     private static void addTransliteration(ICUResourceWriter.ResourceTable parent,
356         ICUResourceWriter.ResourceProcess trans) {
357         if (ruleStringsHash == null) {
358             System.err.println("If you are processing transliteration, you must set the Rules Hashtable.");
359             System.exit(-1);
360         }
361 
362         String dataString = ruleStringsHash.get(trans.val);
363 
364         if (dataString == null) {
365             System.err.println("Could not find data for: " + trans.val);
366             System.exit(-1);
367         }
368 
369         // strip out the unneeded stuff from the buffer (like comments and spaces and line breaks
370         dataString = stripRules(dataString);
371 
372         // create a string resource containing the data and add it to the resource tree
373         // remove the ResourceProcess and add the String
374 
375         ICUResourceWriter.ResourceString replacement = new ICUResourceWriter.ResourceString("Resource", dataString);
376 
377         ICUResourceWriter.Resource current = parent.first;
378 
379         // yes, we're using an address comparison below...because they should both be pointing the the same object when
380         // we find it.
381         if (current != trans) {
382             while (current != null && current.next != trans) {
383                 current = current.next;
384             }
385             if (current != null) {
386                 replacement.next = trans.next;
387                 current.next = replacement;
388             } else {
389                 System.err.println("An unexpected error has occured: Could not find Transliteration resource.");
390                 System.exit(-1);
391             }
392         } else {
393             replacement.next = trans.next;
394             parent.first = replacement;
395         }
396 
397     }
398 
isUWhiteSpace(char c)399     private static boolean isUWhiteSpace(char c) {
400         return (c >= 0x0009 && c <= 0x2029 && (c <= 0x000D || c == 0x0020 || c == 0x0085 ||
401             c == 0x200E || c == 0x200F || c >= 0x2028));
402     }
403 
isNewLine(char c)404     private static boolean isNewLine(char c) {
405         if (c == 0x000d || c == 0x000a) {
406             return true;
407         }
408         return false;
409     }
410 
isPunctuation(char c)411     private static boolean isPunctuation(char c) {
412         int x = UCharacter.getType(c);
413         switch (x) {
414         case ECharacterCategory.CONNECTOR_PUNCTUATION:
415         case ECharacterCategory.DASH_PUNCTUATION:
416         case ECharacterCategory.END_PUNCTUATION:
417         case ECharacterCategory.FINAL_PUNCTUATION:
418         case ECharacterCategory.INITIAL_PUNCTUATION:
419         case ECharacterCategory.OTHER_PUNCTUATION:
420         case ECharacterCategory.START_PUNCTUATION:
421             return true;
422         default:
423             return false;
424         }
425     }
426 
isControl(char c)427     private static boolean isControl(char c) {
428         int x = UCharacter.getType(c);
429         switch (x) {
430         case ECharacterCategory.CONTROL:
431             return true;
432         default:
433             return false;
434         }
435     }
436 
437     // parallels the C++ function utrans_stripRules in rbt_pars.cpp in i18n project
stripRules(String data)438     private static String stripRules(String data) {
439         String newData = "";
440         int currentIndex = 0;
441         char curChar;
442         char curChar2 = '0';
443         boolean needChar2 = false;
444         boolean quoted = false;
445 
446         try {
447 
448             while (currentIndex < data.length()) {
449                 needChar2 = false;
450                 curChar = data.charAt(currentIndex);
451                 // if it's a quote, set the flag
452                 if (curChar == '\'') {
453                     quoted = !quoted;
454                 }
455                 // otherwise...if the quote flag is NOT set.
456                 else if (!quoted) {
457                     // IF comment... ignore comment lines ...starting with #....and until a carriage return or line feed
458                     if (curChar == '#') {
459                         // if the preceeding characters were whitepace or new lines, go back and get rid of them
460 
461                         while (newData.length() > 0
462                             && (isNewLine(newData.charAt(newData.length() - 1)) || isUWhiteSpace(newData.charAt(newData
463                                 .length() - 1)))) {
464                             if (newData.length() == 1) {
465                                 newData = "";
466                             } else {
467                                 newData = newData.substring(0, newData.length() - 2);
468                             }
469 
470                         }
471 
472                         // move to the end of the line
473                         while (!isNewLine(curChar) && currentIndex < data.length()) {
474                             currentIndex++;
475                             if (currentIndex < data.length()) {
476                                 curChar = data.charAt(currentIndex);
477                             }
478                         }
479                         // grab the first character of this new line (no longer part of the comment
480                         currentIndex++;
481                         if (currentIndex < data.length()) {
482                             curChar = data.charAt(currentIndex);
483                         }
484 
485                     } else if (curChar == '\\') // OR if its an escape char //((UChar)0x005C) - \
486                     {
487                         // skip over the \ and then skip any line breaks that may follow
488                         do {
489                             currentIndex++;
490                             if (currentIndex < data.length()) {
491                                 curChar = data.charAt(currentIndex);
492                             }
493                         } while (isNewLine(curChar) && currentIndex < data.length());
494 
495                         // if it's a u and there are 4 more characters after it
496                         if (curChar == 'u' && (data.length() - currentIndex) >= 4) {
497                             // convert it to a character from a codepoint (String)UTF16.valueOf(int)
498 
499                             String hexString = data.substring(currentIndex + 1, currentIndex + 5);
500                             int codeNum = Integer.parseInt(hexString, 16);
501                             String temp = UTF16.valueOf(codeNum);
502                             char tempChar;
503 
504                             tempChar = temp.charAt(0);
505 
506                             // if its 0xFFFFFFFF
507                             if (tempChar == 0xFFFFFFFF) {
508                                 System.err.println("Invalid character found while processing file.");
509                                 System.exit(-1);
510                             }
511                             // if NOT whitespace(isUWhiteSpace) && NOT a control character? && not punctuation
512                             if (!isUWhiteSpace(tempChar) && !isPunctuation(tempChar) && !isControl(tempChar)) {
513                                 // set the current character to this character
514                                 curChar = tempChar;
515                                 currentIndex += 4; // the 4 numbers...will add one more for the u, already did one for
516                                 // the slash
517                                 if (temp.length() > 1) {
518                                     curChar2 = temp.charAt(1);
519                                     needChar2 = true;
520                                 }
521                             }
522 
523                         }
524 
525                     } else if (curChar == '\'')// OR if it's a quote
526                     {
527                         quoted = !quoted;
528                     }
529                 } // end not quoted
530 
531                 if (isNewLine(curChar)) {
532                     quoted = false;
533                     // while we're not hitting the end of the string
534                     while (currentIndex < data.length()) {
535                         if (!isNewLine(curChar)) {
536                             break;
537                         }
538                         currentIndex++;
539                         if (currentIndex < data.length()) {
540                             curChar = data.charAt(currentIndex);
541                         }
542                     }
543                     continue;
544                 }
545 
546                 // append the character to the new string, because we've decided it's ok
547                 newData += curChar;
548                 currentIndex++;
549                 if (needChar2) {
550                     newData += curChar2;
551                 }
552             } // end loop
553 
554         } catch (Exception e) {
555             System.err.println("Had a problem...");
556         }
557         if (newData.length() > data.length()) {
558             return null;
559         }
560         return newData;
561     }
562 
addDependency(ICUResourceWriter.ResourceTable parent, ICUResourceWriter.ResourceProcess dep, String outDir)563     private static void addDependency(ICUResourceWriter.ResourceTable parent, ICUResourceWriter.ResourceProcess dep,
564         String outDir) {
565         String filename;
566         File f;
567 
568         filename = outDir;
569         if (!(outDir.charAt(outDir.length() - 1) == '/' || outDir.charAt(outDir.length() - 1) == '\\')) {
570             filename += "/";
571         }
572 
573         filename += dep.val;
574 
575         f = new File(filename);
576         if (!f.exists()) {
577             System.err.println("WARNING: Could not find dependancy: " + filename);
578         }
579         // create the %%DEPENDENCY array with a string containing the path, add it to the table.
580         ICUResourceWriter.ResourceArray a = new ICUResourceWriter.ResourceArray();
581         a.name = "%%DEPENDENCY";
582         ICUResourceWriter.ResourceString str = new ICUResourceWriter.ResourceString(null, dep.val);
583         a.first = str;
584         dep.addAfter(a);
585 
586         // Remove the ResourceProcess object and replace it with a ResourceString object.
587         ICUResourceWriter.ResourceString replacement = new ICUResourceWriter.ResourceString(dep.name, dep.val);
588 
589         ICUResourceWriter.Resource current = parent.first;
590 
591         // yes, we're using an address comparison below...because they should both be pointing the the same object when
592         // we find it.
593         while (current != null && current.next != dep) {
594             current = current.next;
595         }
596         replacement.next = dep.next;
597         current.next = replacement;
598 
599     }
600 
addCollationElements(ICUResourceWriter.Resource elementTable)601     private static void addCollationElements(ICUResourceWriter.Resource elementTable) {
602         // Element table name is "Collation"
603         // loops through sub tables of Collation and adds CollationBinary as nessisary
604         ICUResourceWriter.Resource cur = elementTable.first;
605 
606         while (cur != null) {
607             addCollation(cur);
608             cur = cur.next;
609         }
610     }
611 
addCollation(ICUResourceWriter.Resource element)612     private static void addCollation(ICUResourceWriter.Resource element) {
613         ICUResourceWriter.Resource cur = element.first;
614 
615         while (cur != null) {
616             if (cur.hasKey && (cur instanceof ICUResourceWriter.ResourceString)) {
617                 ICUResourceWriter.ResourceString strElement = (ICUResourceWriter.ResourceString) cur;
618 
619                 if (strElement.name.equals("Sequence")) {
620                     try {
621                         // RuleBasedCollator rbc = new RuleBasedCollator(strElement.val);
622                         // TODO Generate proper binary data for Collator
623                         /*
624                          * currently CollatorWriter does not work properly
625                          * Need to write something to generate proper bytes,
626                          * bytes do not seem to exist at this time
627                          * CollatorWriter was not committed to the ICU4J trunk, it currently lives in the bdrower
628                          * subdirectory of icu4j in the IBM local cvs
629                          */
630                         // byte[] bytes = CollatorWriter.writeRBC(rbc);
631                         // ICUResourceWriter.ResourceBinary b = new ICUResourceWriter.ResourceBinary();
632                         // b.data = bytes;
633                         // b.name = "%%CollationBin";
634                         // element.addAfter(b);
635 
636                     } catch (Exception e) {
637                         System.err.println("Could not create Collation Binary");
638                     }
639                 }
640             }
641             cur = cur.next;
642         }
643     }
644 
645     /**
646      * Write the header section of the file. This section of the file currently contains:<br>
647      * -A 2 byte number containing the length (in bytes) of the header.<br>
648      * -Two "magic numbers" each 1 byte in size.<br>
649      * -The UDataInfo structure
650      * -The null terminated copyright string (if it should be written)
651      *
652      * @param out
653      * @param info
654      * @param copyright
655      */
writeBinaryHeader(FileOutputStream out, UDataInfo info, String copyright)656     private static void writeBinaryHeader(FileOutputStream out, UDataInfo info, String copyright) {
657         short headSize = 0;
658         byte[] magics = new byte[2];
659         int pad = 0;
660         byte[] padding;
661         /*
662          * The header includes a 2 byte number containing the size of the header,
663          * two magic numbers each 1 byte in size, the UDataInfo structure, and the
664          * copyright plus null terminator. Subject to change.
665          */
666         headSize += info.size + BYTES_TAKEN_BY_HEADER_SIZE + BYTES_TAKEN_BY_MAGIC1 + BYTES_TAKEN_BY_MAGIC2;
667         if (copyright != null && INCLUDE_COPYRIGHT) {
668             headSize += copyright.length() + 1;
669         }
670         if ((pad = pad16Bytes(headSize)) != 0) {
671             headSize += pad;
672         }
673 
674         magics[0] = MAGIC1;
675         magics[1] = MAGIC2;
676 
677         try {
678             // write the size of the header
679             out.write(shortToBytes(headSize));
680             written += (shortToBytes(headSize)).length;
681 
682             // write the two magic numbers
683             out.write(magics);
684             written += magics.length;
685 
686             // write the UDataInfo structure
687             out.write(info.getByteArray());
688             written += info.getByteArray().length;
689 
690             // write the copyright and null terminating byte(s) if writing it
691             if (copyright != null && INCLUDE_COPYRIGHT) {
692                 out.write((copyright + "\0").getBytes(CHARSET8));
693                 written += ((copyright + "\0").getBytes(CHARSET8)).length;
694 
695             }
696 
697             if (pad != 0) {
698                 padding = new byte[pad];
699                 for (int i = 0; i < padding.length; i++) {
700                     padding[i] = 0;
701                 }
702                 out.write(padding);
703                 written += padding.length;
704             }
705 
706         } catch (IOException e) {
707             printError(e.getMessage());
708             e.printStackTrace();
709             System.exit(1);
710         }
711     }
712 
713     /**
714      * Write some information about the key string and then write a chunk of bytes which mirrors the
715      * SRBRoot->fkeys character buffer. This will be a list of null
716      * terminated strings. Each string pertains to a certain resource. This method also modifies the resources in
717      * 'resTop' by setting the keyStringOffset variable. The keyStringOffset variable is the number of bytes from
718      * the start of the key string that the resources key starts. For example:
719      *
720      * <p>
721      * In the 'en_PK' locale, you may have a Table resource with the key "Version." The Table contains a string resource
722      * with the key "1.31."
723      * </p>
724      * <p>
725      * If this were the whole of the locale data, the key string would be an encoded version of this:
726      * </p>
727      *
728      * "Version\01.31\0"
729      *
730      * <br>
731      * <br>
732      * In UTF-16 encoding, each character will take 2 bytes. <br>
733      * keyStringOffset for the table object would be 0. <br>
734      * keyStringOffset for the string resource would be = "Version".length() + 2 = 16
735      *
736      *
737      * @param out
738      *            The output stream to write this to.
739      * @param resTop
740      *            The top of the resource tree whose keys shall be written
741      */
writeKeyString(FileOutputStream out, ICUResourceWriter.Resource resTop)742     private static int writeKeyString(FileOutputStream out, ICUResourceWriter.Resource resTop) {
743         String keyList = "";
744         byte[] padding = null;
745         int padBytes = 0;
746         int end;
747         int root;
748         byte[] rootBytes;
749         int[] indexes = new int[URES_INDEX_TOP];
750         byte[] indexBytes = new byte[URES_INDEX_TOP * 4];
751         byte[] keyBytes;
752         int usedOffset;
753         int sizeOfIndexes;
754         int sizeOfIndexesAndKeys;
755         int tableID;
756 
757         // set flag so that we know which resource is the top of the tree
758         resTop.isTop = true;
759 
760         sizeOfIndexes = (1 + URES_INDEX_TOP) * ICUResourceWriter.SIZE_OF_INT;
761 
762         usedOffset = sizeOfIndexes;
763 
764         // Build the String of keys
765         keyList = buildKeyList(keyList, resTop, usedOffset);
766 
767         sizeOfIndexesAndKeys = sizeOfIndexes + keyList.length();
768         usedOffset = sizeOfIndexesAndKeys + pad32(sizeOfIndexesAndKeys);
769 
770         end = sizeOfIndexesAndKeys + resTop.sizeOfChildren;
771 
772         // if it is not 16 byte aligned
773         if ((padBytes = pad32(sizeOfIndexesAndKeys)) != 0) {
774             padding = createPadding(padBytes);
775             if (padding != null) {
776                 usedOffset += padding.length;
777                 end += padding.length;
778             }
779 
780         }
781 
782         // build a set of 32 bits (in C this variable is called 'root' in reslist.c)
783         // the number of bytes included in the keyList, keyList padding, all the children
784 
785         if (((ICUResourceWriter.ResourceTable) resTop).is32Bit()) {
786             tableID = (URES_TABLE32 << 28);
787         } else {
788             tableID = (URES_TABLE << 28);
789         }
790         root = (end >>> 2) | (tableID);
791 
792         rootBytes = intToBytes(root);
793 
794         end += resTop.size;
795 
796         end += pad32(end);
797 
798         indexes[URES_INDEX_LENGTH] = URES_INDEX_TOP;
799         indexes[URES_INDEX_STRINGS_TOP] = usedOffset >>> 2;
800         indexes[URES_INDEX_RESOURCES_TOP] = (end) >> 2;
801         indexes[URES_INDEX_BUNDLE_TOP] = indexes[URES_INDEX_RESOURCES_TOP];
802         indexes[URES_INDEX_MAX_TABLE_LENGTH] = ICUResourceWriter.maxTableLength;
803 
804         indexBytes = intArrayToBytes(indexes);
805 
806         try {
807             // write the "root" object
808             out.write(rootBytes);
809             written += rootBytes.length;
810 
811             // write the indexes array
812             out.write(indexBytes);
813             written += indexBytes.length;
814 
815             // write the keyList and padding if nessicary
816             keyBytes = keyList.getBytes(CHARSET8);
817             out.write(keyBytes);
818             written += keyBytes.length;
819 
820             if (padding != null) {
821                 out.write(padding);
822                 written += padding.length;
823             }
824         } catch (IOException e) {
825             printError("Could not write key string to file. " + e.getMessage());
826             System.exit(1);
827         }
828 
829         return usedOffset;
830     }
831 
832     /**
833      * Recursively go through the whole tree and continue to add to the keyList. As this is done,
834      * set the keyStringOffset, numChildren, sizeOfChildren, and size variables.
835      *
836      * @param keyList
837      *            The current string of keys.
838      * @param resTop
839      *            The resource whose keys shall be written to the keyList.
840      * @return
841      */
buildKeyList(String keyList, ICUResourceWriter.Resource resTop, int usedOffset)842     private static String buildKeyList(String keyList, ICUResourceWriter.Resource resTop, int usedOffset) {
843         ICUResourceWriter.Resource current = resTop.first;
844         int x = 0;
845 
846         // add this resources key to the list unless it is the top resource or doesn't have a key
847         if (!resTop.isTop && resTop.hasKey) {
848             // clean up quotes if any
849             if (resTop.name.indexOf("\"") >= 0) {
850                 resTop.name = removeQuotes(resTop.name);
851             }
852             // set the keyStringOffset
853             resTop.keyStringOffset = usedOffset + (keyList.length() * BYTES_PER_UTF8_CHAR);
854             keyList += (resTop.name + "\0");
855 
856         }
857 
858         // if it has children, call this method on them too
859         while (current != null) {
860             if (resTop instanceof ICUResourceWriter.ResourceArray
861                 || resTop instanceof ICUResourceWriter.ResourceIntVector) {
862                 current.hasKey = false;
863             }
864 
865             keyList = buildKeyList(keyList, current, usedOffset);
866             x++;
867 
868             // add the size of the current child to the parents sizeOfChildren
869 
870             current = current.next;
871         }
872 
873         // set the size of this object
874         resTop.setSize();
875 
876         resTop.numChildren = x;
877         return keyList;
878     }
879 
880     /**
881      * Takes a 16 bit number and returns a two byte array. 0th element is lower byte, 1st element is upper byte.
882      * Ex: x = 28,000. In binary: 0110 1101 0110 0000. This method will return:
883      * [0] = 0110 0000 or 0x60
884      * [1] = 0110 1101 or 0x6D
885      */
shortToBytes(short x)886     private static byte[] shortToBytes(short x) {
887         byte[] b = new byte[2];
888         b[1] = (byte) (x); // bitwise AND with the lower byte
889         b[0] = (byte) (x >>> 8); // shift four bits to the right and fill with zeros, and then bitwise and with the
890         // lower byte
891         return b;
892     }
893 
894     /**
895      * Takes a 32 bit integer and returns an array of 4 bytes.
896      *
897      */
intToBytes(int x)898     private static byte[] intToBytes(int x) {
899         byte[] b = new byte[4];
900         b[3] = (byte) (x); // just the last byte
901 
902         x = x >>> 8; // shift each byte over one spot.
903         b[2] = (byte) (x); // just the last byte
904 
905         x = x >>> 8; // shift each byte over one spot.
906         b[1] = (byte) (x); // just the last byte
907 
908         x = x >>> 8; // shift each byte over one spot.
909         b[0] = (byte) (x); // just the last byte
910 
911         return b;
912     }
913 
914     /**
915      * Takes an array of integers and returns a byte array of the memory representation.
916      *
917      * @param x
918      * @return
919      */
intArrayToBytes(int[] x)920     private static byte[] intArrayToBytes(int[] x) {
921         byte[] b = new byte[x.length * 4];
922         byte[] temp;
923         int i, z;
924 
925         for (i = 0; i < x.length; i++) {
926             temp = intToBytes(x[i]);
927             for (z = 0; z < 4; z++) {
928                 b[(i * 4) + z] = temp[z];
929             }
930         }
931         return b;
932     }
933 
934     /**
935      * calculate the padding to make things align with 32 bits (aka 4 bytes)
936      *
937      * @param x
938      * @return
939      */
pad32(int x)940     private static int pad32(int x) {
941         return ((x % 4) == 0) ? 0 : (4 - (x % 4));
942     }
943 
pad16Bytes(int x)944     private static int pad16Bytes(int x) {
945         return ((x % 16) == 0) ? 0 : (16 - (x % 16));
946     }
947 
948     /**
949      * for printing errors.
950      */
printError(String message)951     private static void printError(String message) {
952 
953         System.err.println("LDML2ICUBinaryWriter : ERROR : " + message);
954     }
955 
createPadding(int length)956     private static byte[] createPadding(int length) {
957         byte x = (byte) 0x00;
958         byte[] b = new byte[length];
959         if (length == 0) {
960             return null;
961         }
962         for (int z = 0; z < b.length; z++) {
963             b[z] = x;
964         }
965 
966         return b;
967     }
968 
removeQuotes(String s)969     public static String removeQuotes(String s) {
970         String temp = s;
971         String temp2;
972         int x;
973         while (temp.indexOf("\"") >= 0) {
974             x = temp.indexOf("\"");
975             temp2 = temp.substring(0, x);
976             temp2 += temp.substring(x + 1, temp.length());
977             temp = temp2;
978         }
979 
980         return temp;
981     }
982 
983 }