• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *******************************************************************************
3  * Copyright (C) 2003-2012, International Business Machines Corporation and    *
4  * others. All Rights Reserved.                                                *
5  *******************************************************************************
6  */
7 
8 package org.unicode.cldr.icu;
9 
10 import com.ibm.icu.lang.UCharacter;
11 import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory;
12 import com.ibm.icu.text.UTF16;
13 import java.io.File;
14 import java.io.FileNotFoundException;
15 import java.io.FileOutputStream;
16 import java.io.IOException;
17 import java.util.Hashtable;
18 import org.unicode.cldr.util.CldrUtility;
19 
20 /**
21  * The LDML2ICUBinaryWriter class is a set of methods which can be used to generate Binary (.res)
22  * files in the ICU Binary format.
23  *
24  * @author Brian Rower - June 2008
25  */
26 public class LDML2ICUBinaryWriter {
27     /**
28      * This string is the copyright to be written into the file. In the C version, can be found in
29      * <I>icu4c_root</I>/source/common/unicode/uversion.h
30      */
31     private static final String COPYRIGHT = CldrUtility.getCopyrightString();
32 
33     public static int written = 0;
34 
35     /** Magic numbers!!!! */
36     private static final byte MAGIC1 = (byte) 0xda;
37 
38     private static final byte MAGIC2 = 0x27;
39 
40     private static boolean INCLUDE_COPYRIGHT = false;
41     /** The number of bytes it takes to write magic number 1. */
42     private static final short BYTES_TAKEN_BY_MAGIC1 = 1;
43 
44     /** The number of bytes it takes to write magic number 2; */
45     private static final short BYTES_TAKEN_BY_MAGIC2 = 1;
46 
47     /** The number of bytes that it takes to write the size of the header. */
48     private static final short BYTES_TAKEN_BY_HEADER_SIZE = 2;
49 
50     /** The charsets to be used when encoding strings. */
51     public static final String CHARSET8 = "UTF-8";
52 
53     public static final String CHARSET16 = "UTF-16BE";
54 
55     /**
56      * The number of bytes that each character takes up. This is dependant on the encoding (see
57      * CHARSET above).
58      */
59     private static final int BYTES_PER_UTF8_CHAR = 1;
60 
61     /** Numeric constants for special elements. */
62     private static final int SPECIAL_NONE = 0;
63 
64     private static final int SPECIAL_COLLATIONS = 1;
65     private static final int SPECIAL_COLLATIONELEMENTS = 2;
66     private static final int SPECIAL_DEPENDENCY = 3;
67     private static final int SPECIAL_TRANSLITERATOR = 4;
68 
69     /**
70      * Numeric constants for types of resource items.
71      *
72      * @see ures_getType
73      * @stable ICU 2.0
74      */
75 
76     // **************************** ENUM Below is ported from C. See ures.h ***********************
77 
78     /** Resource type constant for "no resource". @stable ICU 2.6 */
79     public static final int URES_NONE = -1;
80 
81     /** Resource type constant for 16-bit Unicode strings. @stable ICU 2.6 */
82     public static final int URES_STRING = 0;
83 
84     /** Resource type constant for binary data. @stable ICU 2.6 */
85     public static final int URES_BINARY = 1;
86 
87     /** Resource type constant for tables of key-value pairs. @stable ICU 2.6 */
88     public static final int URES_TABLE = 2;
89 
90     /**
91      * Resource type constant for aliases; internally stores a string which identifies the actual
92      * resource storing the data (can be in a different resource bundle). Resolved internally before
93      * delivering the actual resource through the API.
94      *
95      * @stable ICU 2.6
96      */
97     public static final int URES_ALIAS = 3;
98 
99     /**
100      * Internal use only. Alternative resource type constant for tables of key-value pairs. Never
101      * returned by ures_getType().
102      *
103      * @internal
104      */
105     public static final int URES_TABLE32 = 4;
106 
107     /**
108      * Resource type constant for a single 28-bit integer, interpreted as signed or unsigned by the
109      * ures_getInt() or ures_getUInt() function.
110      *
111      * @see ures_getInt
112      * @see ures_getUInt
113      * @stable ICU 2.6
114      */
115     public static final int URES_INT = 7;
116 
117     /** Resource type constant for arrays of resources. @stable ICU 2.6 */
118     public static final int URES_ARRAY = 8;
119 
120     /**
121      * Resource type constant for vectors of 32-bit integers.
122      *
123      * @see ures_getIntVector
124      * @stable ICU 2.6
125      */
126     public static final int URES_INT_VECTOR = 14;
127 
128     public static final int URES_LIMIT = 16;
129 
130     /*
131      * The enum below is ported from C. See uresdata.h
132      *
133      * It is used as index references for the array which will be written.
134      */
135     /* [0] contains URES_INDEX_TOP==the length of indexes[] */
136     private static final int URES_INDEX_LENGTH = 0;
137     /* [1] contains the top of the strings, same as the bottom of resources, rounded up */
138     private static final int URES_INDEX_STRINGS_TOP = 1;
139     /* [2] contains the top of all resources */
140     private static final int URES_INDEX_RESOURCES_TOP = 2;
141     /* [3] contains the top of the bundle, in case it were ever different from [2] */
142     private static final int URES_INDEX_BUNDLE_TOP = 3;
143     /* [4] max. length of any table */
144     private static final int URES_INDEX_MAX_TABLE_LENGTH = 4;
145     /* [5] attributes bit set, see URES_ATT_* (new in formatVersion 1.2) */
146     // private static final int URES_INDEX_ATTRIBUTES = 5;
147     /* This one is the length of the array */
148     private static final int URES_INDEX_TOP = 6;
149 
150     // must be set if writing transliteration
151     private static Hashtable<String, String> ruleStringsHash = null;
152 
main()153     public static void main() {}
154 
155     /**
156      * This method is called upon the top of an ICUResourceWriter.Resource in order to write the
157      * whole Resource tree into binary format.
158      *
159      * @param resTop The top of the resource tree that you would like written to file. This object
160      *     should be a ICUResourceWriter.ResourceTable.
161      * @param outDir A string pointing to the path of the output directory.
162      * @param outFile The name of the output file. If filename has an extension other than .res (ex:
163      *     .txt) this method will strip that extention and replace with .res.
164      */
writeBinaryFile( ICUResourceWriter.Resource resTop, String outDir, String outFile)165     public static void writeBinaryFile(
166             ICUResourceWriter.Resource resTop, String outDir, String outFile) {
167         String fileName = "";
168         int usedOffset = 0;
169         String directoryPath = "";
170         FileOutputStream out;
171         UDataInfo info;
172         byte[] dataFormat;
173         byte[] formatVersion;
174         byte[] dataVersion;
175         byte[] padding;
176 
177         // Do some checks on the file name
178         // if it has a period in it...get rid of everything after the period
179         if (outFile.indexOf('.') > -1) {
180             fileName = outFile.substring(0, outFile.indexOf('.'));
181             if (fileName.length() == 0) {
182                 printError(outFile + " is not a valid file name.");
183                 System.exit(1);
184             }
185             fileName = fileName + ".res";
186         } else {
187             fileName = outFile + ".res";
188         }
189         // add the .res part to the file name
190 
191         // do some checks on the directory path
192         // replace all backslashes with forward slashes
193         directoryPath = outDir.replace('\\', '/');
194 
195         // if the path does not end in a slash, then we'll add one
196         if (directoryPath.charAt(directoryPath.length() - 1) != '/') {
197             directoryPath = directoryPath + "/";
198         }
199 
200         // create UDataInfo
201         // Data format is "ResB"
202         dataFormat = new byte[4];
203         dataFormat[0] = 0x52; // R
204         dataFormat[1] = 0x65; // e
205         dataFormat[2] = 0x73; // s
206         dataFormat[3] = 0x42; // B
207 
208         // Format version is 1.2.0.0
209         formatVersion = new byte[4];
210         formatVersion[0] = 1;
211         formatVersion[1] = 2;
212         formatVersion[2] = 0;
213         formatVersion[3] = 0;
214 
215         // data version is 1.4.0.0
216         dataVersion = new byte[4];
217         dataVersion[0] = 1;
218         dataVersion[1] = 4;
219         dataVersion[2] = 0;
220         dataVersion[3] = 0;
221 
222         // now that the file and directory name are formatted, lets try to create an output stream
223         try {
224             System.out.println("Creating file: " + directoryPath + fileName);
225             File f = new File(directoryPath, fileName);
226             out = new FileOutputStream(f);
227 
228             info =
229                     new UDataInfo(
230                             UDataInfo.getSize(),
231                             (short) 0,
232                             UDataInfo.BIGENDIAN,
233                             UDataInfo.ASCII_FAMILY,
234                             UDataInfo.SIZE_OF_UCHAR,
235                             (byte) 0,
236                             dataFormat,
237                             formatVersion,
238                             dataVersion);
239 
240             // this method goes through the tree and looks for a table named CollationElements or
241             // Collations, and adds
242             // the
243             // appropriate data to the tree
244             dealWithSpecialElements(resTop, outDir);
245 
246             // before we do anything with the resources, sort them
247             resTop.sort();
248 
249             // call writeBinaryHeader.
250             writeBinaryHeader(out, info, COPYRIGHT);
251 
252             usedOffset = writeKeyString(out, resTop);
253 
254             // Call writeBinary on the top of the Resource tree
255 
256             usedOffset = resTop.writeBinary(out, usedOffset);
257             padding = createPadding(pad32(usedOffset));
258             if (padding != null) {
259                 out.write(padding);
260                 written += padding.length;
261             }
262             out.close();
263             System.out.println("Finished writing binary.");
264         } catch (FileNotFoundException e) {
265             printError(
266                     directoryPath
267                             + fileName
268                             + " could not be opened, please ensure the correct path is given.");
269             e.printStackTrace();
270             System.exit(1);
271         } catch (SecurityException e) {
272             printError("access denied: " + directoryPath + fileName);
273             e.printStackTrace();
274             System.exit(1);
275         } catch (Exception e) {
276             printError(e.getMessage());
277             System.exit(1);
278         }
279     }
280 
getSpecialType(ICUResourceWriter.Resource res)281     private static int getSpecialType(ICUResourceWriter.Resource res) {
282         if (!res.hasKey) {
283             return SPECIAL_NONE;
284         }
285 
286         if (res.name.equals("CollationElements")
287                 && res instanceof ICUResourceWriter.ResourceTable) {
288             return SPECIAL_COLLATIONELEMENTS;
289         }
290 
291         if (res.name.equals("collations") && res instanceof ICUResourceWriter.ResourceTable) {
292             return SPECIAL_COLLATIONS;
293         }
294 
295         if (res.name.equals("depends") && res instanceof ICUResourceWriter.ResourceProcess) {
296             return SPECIAL_DEPENDENCY;
297         }
298 
299         if (res instanceof ICUResourceWriter.ResourceProcess) {
300             if (((ICUResourceWriter.ResourceProcess) res)
301                     .ext.equals(ICUResourceWriter.TRANSLITERATOR)) {
302                 return SPECIAL_TRANSLITERATOR;
303             }
304         }
305 
306         return SPECIAL_NONE;
307     }
308 
309     /**
310      * Goes through the resource tree recursively and looks for a table named CollationElements,
311      * collations, dependency, or transliterator and adds the appropriate data
312      *
313      * @param top The top of the Resource Tree
314      */
dealWithSpecialElements(ICUResourceWriter.Resource top, String outDir)315     private static void dealWithSpecialElements(ICUResourceWriter.Resource top, String outDir) {
316         // if it's a table
317         if (top instanceof ICUResourceWriter.ResourceTable) {
318             // loop through all it's elements and check if they're anything specialCollationElements
319             // or Collation
320             ICUResourceWriter.Resource cur = top.first;
321             while (cur != null) {
322                 switch (getSpecialType(cur)) {
323                     case SPECIAL_COLLATIONELEMENTS:
324                         addCollation(cur);
325                         break;
326                     case SPECIAL_COLLATIONS:
327                         addCollationElements(cur);
328                         break;
329                     case SPECIAL_DEPENDENCY:
330                         addDependency(
331                                 (ICUResourceWriter.ResourceTable) top,
332                                 (ICUResourceWriter.ResourceProcess) cur,
333                                 outDir);
334                         break;
335                     case SPECIAL_TRANSLITERATOR:
336                         addTransliteration(
337                                 (ICUResourceWriter.ResourceTable) top,
338                                 (ICUResourceWriter.ResourceProcess) cur);
339                         break;
340                     case SPECIAL_NONE:
341                     default:
342                         dealWithSpecialElements(cur, outDir);
343                 }
344 
345                 cur = cur.next;
346             }
347         }
348         // if it's not a table...don't do anything...
349     }
350 
setRulesHash(Hashtable<String, String> hash)351     public static void setRulesHash(Hashtable<String, String> hash) {
352         ruleStringsHash = hash;
353     }
354 
355     // Parallels the C function for parseTransliterator in parse.c of genrb
addTransliteration( ICUResourceWriter.ResourceTable parent, ICUResourceWriter.ResourceProcess trans)356     private static void addTransliteration(
357             ICUResourceWriter.ResourceTable parent, ICUResourceWriter.ResourceProcess trans) {
358         if (ruleStringsHash == null) {
359             System.err.println(
360                     "If you are processing transliteration, you must set the Rules Hashtable.");
361             System.exit(-1);
362         }
363 
364         String dataString = ruleStringsHash.get(trans.val);
365 
366         if (dataString == null) {
367             System.err.println("Could not find data for: " + trans.val);
368             System.exit(-1);
369         }
370 
371         // strip out the unneeded stuff from the buffer (like comments and spaces and line breaks
372         dataString = stripRules(dataString);
373 
374         // create a string resource containing the data and add it to the resource tree
375         // remove the ResourceProcess and add the String
376 
377         ICUResourceWriter.ResourceString replacement =
378                 new ICUResourceWriter.ResourceString("Resource", dataString);
379 
380         ICUResourceWriter.Resource current = parent.first;
381 
382         // yes, we're using an address comparison below...because they should both be pointing the
383         // the same object when
384         // we find it.
385         if (current != trans) {
386             while (current != null && current.next != trans) {
387                 current = current.next;
388             }
389             if (current != null) {
390                 replacement.next = trans.next;
391                 current.next = replacement;
392             } else {
393                 System.err.println(
394                         "An unexpected error has occured: Could not find Transliteration resource.");
395                 System.exit(-1);
396             }
397         } else {
398             replacement.next = trans.next;
399             parent.first = replacement;
400         }
401     }
402 
isUWhiteSpace(char c)403     private static boolean isUWhiteSpace(char c) {
404         return (c >= 0x0009
405                 && c <= 0x2029
406                 && (c <= 0x000D
407                         || c == 0x0020
408                         || c == 0x0085
409                         || c == 0x200E
410                         || c == 0x200F
411                         || c >= 0x2028));
412     }
413 
isNewLine(char c)414     private static boolean isNewLine(char c) {
415         if (c == 0x000d || c == 0x000a) {
416             return true;
417         }
418         return false;
419     }
420 
isPunctuation(char c)421     private static boolean isPunctuation(char c) {
422         int x = UCharacter.getType(c);
423         switch (x) {
424             case ECharacterCategory.CONNECTOR_PUNCTUATION:
425             case ECharacterCategory.DASH_PUNCTUATION:
426             case ECharacterCategory.END_PUNCTUATION:
427             case ECharacterCategory.FINAL_PUNCTUATION:
428             case ECharacterCategory.INITIAL_PUNCTUATION:
429             case ECharacterCategory.OTHER_PUNCTUATION:
430             case ECharacterCategory.START_PUNCTUATION:
431                 return true;
432             default:
433                 return false;
434         }
435     }
436 
isControl(char c)437     private static boolean isControl(char c) {
438         int x = UCharacter.getType(c);
439         switch (x) {
440             case ECharacterCategory.CONTROL:
441                 return true;
442             default:
443                 return false;
444         }
445     }
446 
447     // parallels the C++ function utrans_stripRules in rbt_pars.cpp in i18n project
stripRules(String data)448     private static String stripRules(String data) {
449         String newData = "";
450         int currentIndex = 0;
451         char curChar;
452         char curChar2 = '0';
453         boolean needChar2 = false;
454         boolean quoted = false;
455 
456         try {
457 
458             while (currentIndex < data.length()) {
459                 needChar2 = false;
460                 curChar = data.charAt(currentIndex);
461                 // if it's a quote, set the flag
462                 if (curChar == '\'') {
463                     quoted = !quoted;
464                 }
465                 // otherwise...if the quote flag is NOT set.
466                 else if (!quoted) {
467                     // IF comment... ignore comment lines ...starting with #....and until a carriage
468                     // return or line feed
469                     if (curChar == '#') {
470                         // if the preceeding characters were whitepace or new lines, go back and get
471                         // rid of them
472 
473                         while (newData.length() > 0
474                                 && (isNewLine(newData.charAt(newData.length() - 1))
475                                         || isUWhiteSpace(newData.charAt(newData.length() - 1)))) {
476                             if (newData.length() == 1) {
477                                 newData = "";
478                             } else {
479                                 newData = newData.substring(0, newData.length() - 2);
480                             }
481                         }
482 
483                         // move to the end of the line
484                         while (!isNewLine(curChar) && currentIndex < data.length()) {
485                             currentIndex++;
486                             if (currentIndex < data.length()) {
487                                 curChar = data.charAt(currentIndex);
488                             }
489                         }
490                         // grab the first character of this new line (no longer part of the comment
491                         currentIndex++;
492                         if (currentIndex < data.length()) {
493                             curChar = data.charAt(currentIndex);
494                         }
495 
496                     } else if (curChar == '\\') // OR if its an escape char //((UChar)0x005C) - \
497                     {
498                         // skip over the \ and then skip any line breaks that may follow
499                         do {
500                             currentIndex++;
501                             if (currentIndex < data.length()) {
502                                 curChar = data.charAt(currentIndex);
503                             }
504                         } while (isNewLine(curChar) && currentIndex < data.length());
505 
506                         // if it's a u and there are 4 more characters after it
507                         if (curChar == 'u' && (data.length() - currentIndex) >= 4) {
508                             // convert it to a character from a codepoint (String)UTF16.valueOf(int)
509 
510                             String hexString = data.substring(currentIndex + 1, currentIndex + 5);
511                             int codeNum = Integer.parseInt(hexString, 16);
512                             String temp = UTF16.valueOf(codeNum);
513                             char tempChar;
514 
515                             tempChar = temp.charAt(0);
516 
517                             // if its 0xFFFFFFFF
518                             if (tempChar == 0xFFFFFFFF) {
519                                 System.err.println(
520                                         "Invalid character found while processing file.");
521                                 System.exit(-1);
522                             }
523                             // if NOT whitespace(isUWhiteSpace) && NOT a control character? && not
524                             // punctuation
525                             if (!isUWhiteSpace(tempChar)
526                                     && !isPunctuation(tempChar)
527                                     && !isControl(tempChar)) {
528                                 // set the current character to this character
529                                 curChar = tempChar;
530                                 currentIndex +=
531                                         4; // the 4 numbers...will add one more for the u, already
532                                 // did one for
533                                 // the slash
534                                 if (temp.length() > 1) {
535                                     curChar2 = temp.charAt(1);
536                                     needChar2 = true;
537                                 }
538                             }
539                         }
540 
541                     } else if (curChar == '\'') // OR if it's a quote
542                     {
543                         quoted = !quoted;
544                     }
545                 } // end not quoted
546 
547                 if (isNewLine(curChar)) {
548                     quoted = false;
549                     // while we're not hitting the end of the string
550                     while (currentIndex < data.length()) {
551                         if (!isNewLine(curChar)) {
552                             break;
553                         }
554                         currentIndex++;
555                         if (currentIndex < data.length()) {
556                             curChar = data.charAt(currentIndex);
557                         }
558                     }
559                     continue;
560                 }
561 
562                 // append the character to the new string, because we've decided it's ok
563                 newData += curChar;
564                 currentIndex++;
565                 if (needChar2) {
566                     newData += curChar2;
567                 }
568             } // end loop
569 
570         } catch (Exception e) {
571             System.err.println("Had a problem...");
572         }
573         if (newData.length() > data.length()) {
574             return null;
575         }
576         return newData;
577     }
578 
addDependency( ICUResourceWriter.ResourceTable parent, ICUResourceWriter.ResourceProcess dep, String outDir)579     private static void addDependency(
580             ICUResourceWriter.ResourceTable parent,
581             ICUResourceWriter.ResourceProcess dep,
582             String outDir) {
583         String filename;
584         File f;
585 
586         filename = outDir;
587         if (!(outDir.charAt(outDir.length() - 1) == '/'
588                 || outDir.charAt(outDir.length() - 1) == '\\')) {
589             filename += "/";
590         }
591 
592         filename += dep.val;
593 
594         f = new File(filename);
595         if (!f.exists()) {
596             System.err.println("WARNING: Could not find dependancy: " + filename);
597         }
598         // create the %%DEPENDENCY array with a string containing the path, add it to the table.
599         ICUResourceWriter.ResourceArray a = new ICUResourceWriter.ResourceArray();
600         a.name = "%%DEPENDENCY";
601         ICUResourceWriter.ResourceString str = new ICUResourceWriter.ResourceString(null, dep.val);
602         a.first = str;
603         dep.addAfter(a);
604 
605         // Remove the ResourceProcess object and replace it with a ResourceString object.
606         ICUResourceWriter.ResourceString replacement =
607                 new ICUResourceWriter.ResourceString(dep.name, dep.val);
608 
609         ICUResourceWriter.Resource current = parent.first;
610 
611         // yes, we're using an address comparison below...because they should both be pointing the
612         // the same object when
613         // we find it.
614         while (current != null && current.next != dep) {
615             current = current.next;
616         }
617         replacement.next = dep.next;
618         current.next = replacement;
619     }
620 
addCollationElements(ICUResourceWriter.Resource elementTable)621     private static void addCollationElements(ICUResourceWriter.Resource elementTable) {
622         // Element table name is "Collation"
623         // loops through sub tables of Collation and adds CollationBinary as nessisary
624         ICUResourceWriter.Resource cur = elementTable.first;
625 
626         while (cur != null) {
627             addCollation(cur);
628             cur = cur.next;
629         }
630     }
631 
addCollation(ICUResourceWriter.Resource element)632     private static void addCollation(ICUResourceWriter.Resource element) {
633         ICUResourceWriter.Resource cur = element.first;
634 
635         while (cur != null) {
636             if (cur.hasKey && (cur instanceof ICUResourceWriter.ResourceString)) {
637                 ICUResourceWriter.ResourceString strElement =
638                         (ICUResourceWriter.ResourceString) cur;
639 
640                 if (strElement.name.equals("Sequence")) {
641                     try {
642                         // RuleBasedCollator rbc = new RuleBasedCollator(strElement.val);
643                         // TODO Generate proper binary data for Collator
644                         /*
645                          * currently CollatorWriter does not work properly
646                          * Need to write something to generate proper bytes,
647                          * bytes do not seem to exist at this time
648                          * CollatorWriter was not committed to the ICU4J trunk, it currently lives in the bdrower
649                          * subdirectory of icu4j in the IBM local cvs
650                          */
651                         // byte[] bytes = CollatorWriter.writeRBC(rbc);
652                         // ICUResourceWriter.ResourceBinary b = new
653                         // ICUResourceWriter.ResourceBinary();
654                         // b.data = bytes;
655                         // b.name = "%%CollationBin";
656                         // element.addAfter(b);
657 
658                     } catch (Exception e) {
659                         System.err.println("Could not create Collation Binary");
660                     }
661                 }
662             }
663             cur = cur.next;
664         }
665     }
666 
667     /**
668      * Write the header section of the file. This section of the file currently contains:<br>
669      * -A 2 byte number containing the length (in bytes) of the header.<br>
670      * -Two "magic numbers" each 1 byte in size.<br>
671      * -The UDataInfo structure -The null terminated copyright string (if it should be written)
672      *
673      * @param out
674      * @param info
675      * @param copyright
676      */
writeBinaryHeader(FileOutputStream out, UDataInfo info, String copyright)677     private static void writeBinaryHeader(FileOutputStream out, UDataInfo info, String copyright) {
678         short headSize = 0;
679         byte[] magics = new byte[2];
680         int pad = 0;
681         byte[] padding;
682         /*
683          * The header includes a 2 byte number containing the size of the header,
684          * two magic numbers each 1 byte in size, the UDataInfo structure, and the
685          * copyright plus null terminator. Subject to change.
686          */
687         headSize +=
688                 info.size
689                         + BYTES_TAKEN_BY_HEADER_SIZE
690                         + BYTES_TAKEN_BY_MAGIC1
691                         + BYTES_TAKEN_BY_MAGIC2;
692         if (copyright != null && INCLUDE_COPYRIGHT) {
693             headSize += copyright.length() + 1;
694         }
695         if ((pad = pad16Bytes(headSize)) != 0) {
696             headSize += pad;
697         }
698 
699         magics[0] = MAGIC1;
700         magics[1] = MAGIC2;
701 
702         try {
703             // write the size of the header
704             out.write(shortToBytes(headSize));
705             written += (shortToBytes(headSize)).length;
706 
707             // write the two magic numbers
708             out.write(magics);
709             written += magics.length;
710 
711             // write the UDataInfo structure
712             out.write(info.getByteArray());
713             written += info.getByteArray().length;
714 
715             // write the copyright and null terminating byte(s) if writing it
716             if (copyright != null && INCLUDE_COPYRIGHT) {
717                 out.write((copyright + "\0").getBytes(CHARSET8));
718                 written += ((copyright + "\0").getBytes(CHARSET8)).length;
719             }
720 
721             if (pad != 0) {
722                 padding = new byte[pad];
723                 for (int i = 0; i < padding.length; i++) {
724                     padding[i] = 0;
725                 }
726                 out.write(padding);
727                 written += padding.length;
728             }
729 
730         } catch (IOException e) {
731             printError(e.getMessage());
732             e.printStackTrace();
733             System.exit(1);
734         }
735     }
736 
737     /**
738      * Write some information about the key string and then write a chunk of bytes which mirrors the
739      * SRBRoot->fkeys character buffer. This will be a list of null terminated strings. Each string
740      * pertains to a certain resource. This method also modifies the resources in 'resTop' by
741      * setting the keyStringOffset variable. The keyStringOffset variable is the number of bytes
742      * from the start of the key string that the resources key starts. For example:
743      *
744      * <p>In the 'en_PK' locale, you may have a Table resource with the key "Version." The Table
745      * contains a string resource with the key "1.31."
746      *
747      * <p>If this were the whole of the locale data, the key string would be an encoded version of
748      * this: "Version\01.31\0" <br>
749      * <br>
750      * In UTF-16 encoding, each character will take 2 bytes. <br>
751      * keyStringOffset for the table object would be 0. <br>
752      * keyStringOffset for the string resource would be = "Version".length() + 2 = 16
753      *
754      * @param out The output stream to write this to.
755      * @param resTop The top of the resource tree whose keys shall be written
756      */
writeKeyString(FileOutputStream out, ICUResourceWriter.Resource resTop)757     private static int writeKeyString(FileOutputStream out, ICUResourceWriter.Resource resTop) {
758         String keyList = "";
759         byte[] padding = null;
760         int padBytes = 0;
761         int end;
762         int root;
763         byte[] rootBytes;
764         int[] indexes = new int[URES_INDEX_TOP];
765         byte[] indexBytes = new byte[URES_INDEX_TOP * 4];
766         byte[] keyBytes;
767         int usedOffset;
768         int sizeOfIndexes;
769         int sizeOfIndexesAndKeys;
770         int tableID;
771 
772         // set flag so that we know which resource is the top of the tree
773         resTop.isTop = true;
774 
775         sizeOfIndexes = (1 + URES_INDEX_TOP) * ICUResourceWriter.SIZE_OF_INT;
776 
777         usedOffset = sizeOfIndexes;
778 
779         // Build the String of keys
780         keyList = buildKeyList(keyList, resTop, usedOffset);
781 
782         sizeOfIndexesAndKeys = sizeOfIndexes + keyList.length();
783         usedOffset = sizeOfIndexesAndKeys + pad32(sizeOfIndexesAndKeys);
784 
785         end = sizeOfIndexesAndKeys + resTop.sizeOfChildren;
786 
787         // if it is not 16 byte aligned
788         if ((padBytes = pad32(sizeOfIndexesAndKeys)) != 0) {
789             padding = createPadding(padBytes);
790             if (padding != null) {
791                 usedOffset += padding.length;
792                 end += padding.length;
793             }
794         }
795 
796         // build a set of 32 bits (in C this variable is called 'root' in reslist.c)
797         // the number of bytes included in the keyList, keyList padding, all the children
798 
799         if (((ICUResourceWriter.ResourceTable) resTop).is32Bit()) {
800             tableID = (URES_TABLE32 << 28);
801         } else {
802             tableID = (URES_TABLE << 28);
803         }
804         root = (end >>> 2) | (tableID);
805 
806         rootBytes = intToBytes(root);
807 
808         end += resTop.size;
809 
810         end += pad32(end);
811 
812         indexes[URES_INDEX_LENGTH] = URES_INDEX_TOP;
813         indexes[URES_INDEX_STRINGS_TOP] = usedOffset >>> 2;
814         indexes[URES_INDEX_RESOURCES_TOP] = (end) >> 2;
815         indexes[URES_INDEX_BUNDLE_TOP] = indexes[URES_INDEX_RESOURCES_TOP];
816         indexes[URES_INDEX_MAX_TABLE_LENGTH] = ICUResourceWriter.maxTableLength;
817 
818         indexBytes = intArrayToBytes(indexes);
819 
820         try {
821             // write the "root" object
822             out.write(rootBytes);
823             written += rootBytes.length;
824 
825             // write the indexes array
826             out.write(indexBytes);
827             written += indexBytes.length;
828 
829             // write the keyList and padding if nessicary
830             keyBytes = keyList.getBytes(CHARSET8);
831             out.write(keyBytes);
832             written += keyBytes.length;
833 
834             if (padding != null) {
835                 out.write(padding);
836                 written += padding.length;
837             }
838         } catch (IOException e) {
839             printError("Could not write key string to file. " + e.getMessage());
840             System.exit(1);
841         }
842 
843         return usedOffset;
844     }
845 
846     /**
847      * Recursively go through the whole tree and continue to add to the keyList. As this is done,
848      * set the keyStringOffset, numChildren, sizeOfChildren, and size variables.
849      *
850      * @param keyList The current string of keys.
851      * @param resTop The resource whose keys shall be written to the keyList.
852      * @return
853      */
buildKeyList( String keyList, ICUResourceWriter.Resource resTop, int usedOffset)854     private static String buildKeyList(
855             String keyList, ICUResourceWriter.Resource resTop, int usedOffset) {
856         ICUResourceWriter.Resource current = resTop.first;
857         int x = 0;
858 
859         // add this resources key to the list unless it is the top resource or doesn't have a key
860         if (!resTop.isTop && resTop.hasKey) {
861             // clean up quotes if any
862             if (resTop.name.indexOf("\"") >= 0) {
863                 resTop.name = removeQuotes(resTop.name);
864             }
865             // set the keyStringOffset
866             resTop.keyStringOffset = usedOffset + (keyList.length() * BYTES_PER_UTF8_CHAR);
867             keyList += (resTop.name + "\0");
868         }
869 
870         // if it has children, call this method on them too
871         while (current != null) {
872             if (resTop instanceof ICUResourceWriter.ResourceArray
873                     || resTop instanceof ICUResourceWriter.ResourceIntVector) {
874                 current.hasKey = false;
875             }
876 
877             keyList = buildKeyList(keyList, current, usedOffset);
878             x++;
879 
880             // add the size of the current child to the parents sizeOfChildren
881 
882             current = current.next;
883         }
884 
885         // set the size of this object
886         resTop.setSize();
887 
888         resTop.numChildren = x;
889         return keyList;
890     }
891 
892     /**
893      * Takes a 16 bit number and returns a two byte array. 0th element is lower byte, 1st element is
894      * upper byte. Ex: x = 28,000. In binary: 0110 1101 0110 0000. This method will return: [0] =
895      * 0110 0000 or 0x60 [1] = 0110 1101 or 0x6D
896      */
shortToBytes(short x)897     private static byte[] shortToBytes(short x) {
898         byte[] b = new byte[2];
899         b[1] = (byte) (x); // bitwise AND with the lower byte
900         b[0] = (byte) (x >>> 8); // shift four bits to the right and fill with zeros, and
901         // then bitwise and with the
902         // lower byte
903         return b;
904     }
905 
906     /** Takes a 32 bit integer and returns an array of 4 bytes. */
intToBytes(int x)907     private static byte[] intToBytes(int x) {
908         byte[] b = new byte[4];
909         b[3] = (byte) (x); // just the last byte
910 
911         x = x >>> 8; // shift each byte over one spot.
912         b[2] = (byte) (x); // just the last byte
913 
914         x = x >>> 8; // shift each byte over one spot.
915         b[1] = (byte) (x); // just the last byte
916 
917         x = x >>> 8; // shift each byte over one spot.
918         b[0] = (byte) (x); // just the last byte
919 
920         return b;
921     }
922 
923     /**
924      * Takes an array of integers and returns a byte array of the memory representation.
925      *
926      * @param x
927      * @return
928      */
intArrayToBytes(int[] x)929     private static byte[] intArrayToBytes(int[] x) {
930         byte[] b = new byte[x.length * 4];
931         byte[] temp;
932         int i, z;
933 
934         for (i = 0; i < x.length; i++) {
935             temp = intToBytes(x[i]);
936             for (z = 0; z < 4; z++) {
937                 b[(i * 4) + z] = temp[z];
938             }
939         }
940         return b;
941     }
942 
943     /**
944      * calculate the padding to make things align with 32 bits (aka 4 bytes)
945      *
946      * @param x
947      * @return
948      */
pad32(int x)949     private static int pad32(int x) {
950         return ((x % 4) == 0) ? 0 : (4 - (x % 4));
951     }
952 
pad16Bytes(int x)953     private static int pad16Bytes(int x) {
954         return ((x % 16) == 0) ? 0 : (16 - (x % 16));
955     }
956 
957     /** for printing errors. */
printError(String message)958     private static void printError(String message) {
959 
960         System.err.println("LDML2ICUBinaryWriter : ERROR : " + message);
961     }
962 
createPadding(int length)963     private static byte[] createPadding(int length) {
964         byte x = (byte) 0x00;
965         byte[] b = new byte[length];
966         if (length == 0) {
967             return null;
968         }
969         for (int z = 0; z < b.length; z++) {
970             b[z] = x;
971         }
972 
973         return b;
974     }
975 
removeQuotes(String s)976     public static String removeQuotes(String s) {
977         String temp = s;
978         String temp2;
979         int x;
980         while (temp.indexOf("\"") >= 0) {
981             x = temp.indexOf("\"");
982             temp2 = temp.substring(0, x);
983             temp2 += temp.substring(x + 1, temp.length());
984             temp = temp2;
985         }
986 
987         return temp;
988     }
989 }
990