1 /* 2 ******************************************************************************* 3 * Copyright (C) 2003-2012, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ******************************************************************************* 6 */ 7 8 package org.unicode.cldr.icu; 9 10 import com.ibm.icu.lang.UCharacter; 11 import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory; 12 import com.ibm.icu.text.UTF16; 13 import java.io.File; 14 import java.io.FileNotFoundException; 15 import java.io.FileOutputStream; 16 import java.io.IOException; 17 import java.util.Hashtable; 18 import org.unicode.cldr.util.CldrUtility; 19 20 /** 21 * The LDML2ICUBinaryWriter class is a set of methods which can be used to generate Binary (.res) 22 * files in the ICU Binary format. 23 * 24 * @author Brian Rower - June 2008 25 */ 26 public class LDML2ICUBinaryWriter { 27 /** 28 * This string is the copyright to be written into the file. In the C version, can be found in 29 * <I>icu4c_root</I>/source/common/unicode/uversion.h 30 */ 31 private static final String COPYRIGHT = CldrUtility.getCopyrightString(); 32 33 public static int written = 0; 34 35 /** Magic numbers!!!! */ 36 private static final byte MAGIC1 = (byte) 0xda; 37 38 private static final byte MAGIC2 = 0x27; 39 40 private static boolean INCLUDE_COPYRIGHT = false; 41 /** The number of bytes it takes to write magic number 1. */ 42 private static final short BYTES_TAKEN_BY_MAGIC1 = 1; 43 44 /** The number of bytes it takes to write magic number 2; */ 45 private static final short BYTES_TAKEN_BY_MAGIC2 = 1; 46 47 /** The number of bytes that it takes to write the size of the header. */ 48 private static final short BYTES_TAKEN_BY_HEADER_SIZE = 2; 49 50 /** The charsets to be used when encoding strings. */ 51 public static final String CHARSET8 = "UTF-8"; 52 53 public static final String CHARSET16 = "UTF-16BE"; 54 55 /** 56 * The number of bytes that each character takes up. This is dependant on the encoding (see 57 * CHARSET above). 58 */ 59 private static final int BYTES_PER_UTF8_CHAR = 1; 60 61 /** Numeric constants for special elements. */ 62 private static final int SPECIAL_NONE = 0; 63 64 private static final int SPECIAL_COLLATIONS = 1; 65 private static final int SPECIAL_COLLATIONELEMENTS = 2; 66 private static final int SPECIAL_DEPENDENCY = 3; 67 private static final int SPECIAL_TRANSLITERATOR = 4; 68 69 /** 70 * Numeric constants for types of resource items. 71 * 72 * @see ures_getType 73 * @stable ICU 2.0 74 */ 75 76 // **************************** ENUM Below is ported from C. See ures.h *********************** 77 78 /** Resource type constant for "no resource". @stable ICU 2.6 */ 79 public static final int URES_NONE = -1; 80 81 /** Resource type constant for 16-bit Unicode strings. @stable ICU 2.6 */ 82 public static final int URES_STRING = 0; 83 84 /** Resource type constant for binary data. @stable ICU 2.6 */ 85 public static final int URES_BINARY = 1; 86 87 /** Resource type constant for tables of key-value pairs. @stable ICU 2.6 */ 88 public static final int URES_TABLE = 2; 89 90 /** 91 * Resource type constant for aliases; internally stores a string which identifies the actual 92 * resource storing the data (can be in a different resource bundle). Resolved internally before 93 * delivering the actual resource through the API. 94 * 95 * @stable ICU 2.6 96 */ 97 public static final int URES_ALIAS = 3; 98 99 /** 100 * Internal use only. Alternative resource type constant for tables of key-value pairs. Never 101 * returned by ures_getType(). 102 * 103 * @internal 104 */ 105 public static final int URES_TABLE32 = 4; 106 107 /** 108 * Resource type constant for a single 28-bit integer, interpreted as signed or unsigned by the 109 * ures_getInt() or ures_getUInt() function. 110 * 111 * @see ures_getInt 112 * @see ures_getUInt 113 * @stable ICU 2.6 114 */ 115 public static final int URES_INT = 7; 116 117 /** Resource type constant for arrays of resources. @stable ICU 2.6 */ 118 public static final int URES_ARRAY = 8; 119 120 /** 121 * Resource type constant for vectors of 32-bit integers. 122 * 123 * @see ures_getIntVector 124 * @stable ICU 2.6 125 */ 126 public static final int URES_INT_VECTOR = 14; 127 128 public static final int URES_LIMIT = 16; 129 130 /* 131 * The enum below is ported from C. See uresdata.h 132 * 133 * It is used as index references for the array which will be written. 134 */ 135 /* [0] contains URES_INDEX_TOP==the length of indexes[] */ 136 private static final int URES_INDEX_LENGTH = 0; 137 /* [1] contains the top of the strings, same as the bottom of resources, rounded up */ 138 private static final int URES_INDEX_STRINGS_TOP = 1; 139 /* [2] contains the top of all resources */ 140 private static final int URES_INDEX_RESOURCES_TOP = 2; 141 /* [3] contains the top of the bundle, in case it were ever different from [2] */ 142 private static final int URES_INDEX_BUNDLE_TOP = 3; 143 /* [4] max. length of any table */ 144 private static final int URES_INDEX_MAX_TABLE_LENGTH = 4; 145 /* [5] attributes bit set, see URES_ATT_* (new in formatVersion 1.2) */ 146 // private static final int URES_INDEX_ATTRIBUTES = 5; 147 /* This one is the length of the array */ 148 private static final int URES_INDEX_TOP = 6; 149 150 // must be set if writing transliteration 151 private static Hashtable<String, String> ruleStringsHash = null; 152 main()153 public static void main() {} 154 155 /** 156 * This method is called upon the top of an ICUResourceWriter.Resource in order to write the 157 * whole Resource tree into binary format. 158 * 159 * @param resTop The top of the resource tree that you would like written to file. This object 160 * should be a ICUResourceWriter.ResourceTable. 161 * @param outDir A string pointing to the path of the output directory. 162 * @param outFile The name of the output file. If filename has an extension other than .res (ex: 163 * .txt) this method will strip that extention and replace with .res. 164 */ writeBinaryFile( ICUResourceWriter.Resource resTop, String outDir, String outFile)165 public static void writeBinaryFile( 166 ICUResourceWriter.Resource resTop, String outDir, String outFile) { 167 String fileName = ""; 168 int usedOffset = 0; 169 String directoryPath = ""; 170 FileOutputStream out; 171 UDataInfo info; 172 byte[] dataFormat; 173 byte[] formatVersion; 174 byte[] dataVersion; 175 byte[] padding; 176 177 // Do some checks on the file name 178 // if it has a period in it...get rid of everything after the period 179 if (outFile.indexOf('.') > -1) { 180 fileName = outFile.substring(0, outFile.indexOf('.')); 181 if (fileName.length() == 0) { 182 printError(outFile + " is not a valid file name."); 183 System.exit(1); 184 } 185 fileName = fileName + ".res"; 186 } else { 187 fileName = outFile + ".res"; 188 } 189 // add the .res part to the file name 190 191 // do some checks on the directory path 192 // replace all backslashes with forward slashes 193 directoryPath = outDir.replace('\\', '/'); 194 195 // if the path does not end in a slash, then we'll add one 196 if (directoryPath.charAt(directoryPath.length() - 1) != '/') { 197 directoryPath = directoryPath + "/"; 198 } 199 200 // create UDataInfo 201 // Data format is "ResB" 202 dataFormat = new byte[4]; 203 dataFormat[0] = 0x52; // R 204 dataFormat[1] = 0x65; // e 205 dataFormat[2] = 0x73; // s 206 dataFormat[3] = 0x42; // B 207 208 // Format version is 1.2.0.0 209 formatVersion = new byte[4]; 210 formatVersion[0] = 1; 211 formatVersion[1] = 2; 212 formatVersion[2] = 0; 213 formatVersion[3] = 0; 214 215 // data version is 1.4.0.0 216 dataVersion = new byte[4]; 217 dataVersion[0] = 1; 218 dataVersion[1] = 4; 219 dataVersion[2] = 0; 220 dataVersion[3] = 0; 221 222 // now that the file and directory name are formatted, lets try to create an output stream 223 try { 224 System.out.println("Creating file: " + directoryPath + fileName); 225 File f = new File(directoryPath, fileName); 226 out = new FileOutputStream(f); 227 228 info = 229 new UDataInfo( 230 UDataInfo.getSize(), 231 (short) 0, 232 UDataInfo.BIGENDIAN, 233 UDataInfo.ASCII_FAMILY, 234 UDataInfo.SIZE_OF_UCHAR, 235 (byte) 0, 236 dataFormat, 237 formatVersion, 238 dataVersion); 239 240 // this method goes through the tree and looks for a table named CollationElements or 241 // Collations, and adds 242 // the 243 // appropriate data to the tree 244 dealWithSpecialElements(resTop, outDir); 245 246 // before we do anything with the resources, sort them 247 resTop.sort(); 248 249 // call writeBinaryHeader. 250 writeBinaryHeader(out, info, COPYRIGHT); 251 252 usedOffset = writeKeyString(out, resTop); 253 254 // Call writeBinary on the top of the Resource tree 255 256 usedOffset = resTop.writeBinary(out, usedOffset); 257 padding = createPadding(pad32(usedOffset)); 258 if (padding != null) { 259 out.write(padding); 260 written += padding.length; 261 } 262 out.close(); 263 System.out.println("Finished writing binary."); 264 } catch (FileNotFoundException e) { 265 printError( 266 directoryPath 267 + fileName 268 + " could not be opened, please ensure the correct path is given."); 269 e.printStackTrace(); 270 System.exit(1); 271 } catch (SecurityException e) { 272 printError("access denied: " + directoryPath + fileName); 273 e.printStackTrace(); 274 System.exit(1); 275 } catch (Exception e) { 276 printError(e.getMessage()); 277 System.exit(1); 278 } 279 } 280 getSpecialType(ICUResourceWriter.Resource res)281 private static int getSpecialType(ICUResourceWriter.Resource res) { 282 if (!res.hasKey) { 283 return SPECIAL_NONE; 284 } 285 286 if (res.name.equals("CollationElements") 287 && res instanceof ICUResourceWriter.ResourceTable) { 288 return SPECIAL_COLLATIONELEMENTS; 289 } 290 291 if (res.name.equals("collations") && res instanceof ICUResourceWriter.ResourceTable) { 292 return SPECIAL_COLLATIONS; 293 } 294 295 if (res.name.equals("depends") && res instanceof ICUResourceWriter.ResourceProcess) { 296 return SPECIAL_DEPENDENCY; 297 } 298 299 if (res instanceof ICUResourceWriter.ResourceProcess) { 300 if (((ICUResourceWriter.ResourceProcess) res) 301 .ext.equals(ICUResourceWriter.TRANSLITERATOR)) { 302 return SPECIAL_TRANSLITERATOR; 303 } 304 } 305 306 return SPECIAL_NONE; 307 } 308 309 /** 310 * Goes through the resource tree recursively and looks for a table named CollationElements, 311 * collations, dependency, or transliterator and adds the appropriate data 312 * 313 * @param top The top of the Resource Tree 314 */ dealWithSpecialElements(ICUResourceWriter.Resource top, String outDir)315 private static void dealWithSpecialElements(ICUResourceWriter.Resource top, String outDir) { 316 // if it's a table 317 if (top instanceof ICUResourceWriter.ResourceTable) { 318 // loop through all it's elements and check if they're anything specialCollationElements 319 // or Collation 320 ICUResourceWriter.Resource cur = top.first; 321 while (cur != null) { 322 switch (getSpecialType(cur)) { 323 case SPECIAL_COLLATIONELEMENTS: 324 addCollation(cur); 325 break; 326 case SPECIAL_COLLATIONS: 327 addCollationElements(cur); 328 break; 329 case SPECIAL_DEPENDENCY: 330 addDependency( 331 (ICUResourceWriter.ResourceTable) top, 332 (ICUResourceWriter.ResourceProcess) cur, 333 outDir); 334 break; 335 case SPECIAL_TRANSLITERATOR: 336 addTransliteration( 337 (ICUResourceWriter.ResourceTable) top, 338 (ICUResourceWriter.ResourceProcess) cur); 339 break; 340 case SPECIAL_NONE: 341 default: 342 dealWithSpecialElements(cur, outDir); 343 } 344 345 cur = cur.next; 346 } 347 } 348 // if it's not a table...don't do anything... 349 } 350 setRulesHash(Hashtable<String, String> hash)351 public static void setRulesHash(Hashtable<String, String> hash) { 352 ruleStringsHash = hash; 353 } 354 355 // Parallels the C function for parseTransliterator in parse.c of genrb addTransliteration( ICUResourceWriter.ResourceTable parent, ICUResourceWriter.ResourceProcess trans)356 private static void addTransliteration( 357 ICUResourceWriter.ResourceTable parent, ICUResourceWriter.ResourceProcess trans) { 358 if (ruleStringsHash == null) { 359 System.err.println( 360 "If you are processing transliteration, you must set the Rules Hashtable."); 361 System.exit(-1); 362 } 363 364 String dataString = ruleStringsHash.get(trans.val); 365 366 if (dataString == null) { 367 System.err.println("Could not find data for: " + trans.val); 368 System.exit(-1); 369 } 370 371 // strip out the unneeded stuff from the buffer (like comments and spaces and line breaks 372 dataString = stripRules(dataString); 373 374 // create a string resource containing the data and add it to the resource tree 375 // remove the ResourceProcess and add the String 376 377 ICUResourceWriter.ResourceString replacement = 378 new ICUResourceWriter.ResourceString("Resource", dataString); 379 380 ICUResourceWriter.Resource current = parent.first; 381 382 // yes, we're using an address comparison below...because they should both be pointing the 383 // the same object when 384 // we find it. 385 if (current != trans) { 386 while (current != null && current.next != trans) { 387 current = current.next; 388 } 389 if (current != null) { 390 replacement.next = trans.next; 391 current.next = replacement; 392 } else { 393 System.err.println( 394 "An unexpected error has occured: Could not find Transliteration resource."); 395 System.exit(-1); 396 } 397 } else { 398 replacement.next = trans.next; 399 parent.first = replacement; 400 } 401 } 402 isUWhiteSpace(char c)403 private static boolean isUWhiteSpace(char c) { 404 return (c >= 0x0009 405 && c <= 0x2029 406 && (c <= 0x000D 407 || c == 0x0020 408 || c == 0x0085 409 || c == 0x200E 410 || c == 0x200F 411 || c >= 0x2028)); 412 } 413 isNewLine(char c)414 private static boolean isNewLine(char c) { 415 if (c == 0x000d || c == 0x000a) { 416 return true; 417 } 418 return false; 419 } 420 isPunctuation(char c)421 private static boolean isPunctuation(char c) { 422 int x = UCharacter.getType(c); 423 switch (x) { 424 case ECharacterCategory.CONNECTOR_PUNCTUATION: 425 case ECharacterCategory.DASH_PUNCTUATION: 426 case ECharacterCategory.END_PUNCTUATION: 427 case ECharacterCategory.FINAL_PUNCTUATION: 428 case ECharacterCategory.INITIAL_PUNCTUATION: 429 case ECharacterCategory.OTHER_PUNCTUATION: 430 case ECharacterCategory.START_PUNCTUATION: 431 return true; 432 default: 433 return false; 434 } 435 } 436 isControl(char c)437 private static boolean isControl(char c) { 438 int x = UCharacter.getType(c); 439 switch (x) { 440 case ECharacterCategory.CONTROL: 441 return true; 442 default: 443 return false; 444 } 445 } 446 447 // parallels the C++ function utrans_stripRules in rbt_pars.cpp in i18n project stripRules(String data)448 private static String stripRules(String data) { 449 String newData = ""; 450 int currentIndex = 0; 451 char curChar; 452 char curChar2 = '0'; 453 boolean needChar2 = false; 454 boolean quoted = false; 455 456 try { 457 458 while (currentIndex < data.length()) { 459 needChar2 = false; 460 curChar = data.charAt(currentIndex); 461 // if it's a quote, set the flag 462 if (curChar == '\'') { 463 quoted = !quoted; 464 } 465 // otherwise...if the quote flag is NOT set. 466 else if (!quoted) { 467 // IF comment... ignore comment lines ...starting with #....and until a carriage 468 // return or line feed 469 if (curChar == '#') { 470 // if the preceeding characters were whitepace or new lines, go back and get 471 // rid of them 472 473 while (newData.length() > 0 474 && (isNewLine(newData.charAt(newData.length() - 1)) 475 || isUWhiteSpace(newData.charAt(newData.length() - 1)))) { 476 if (newData.length() == 1) { 477 newData = ""; 478 } else { 479 newData = newData.substring(0, newData.length() - 2); 480 } 481 } 482 483 // move to the end of the line 484 while (!isNewLine(curChar) && currentIndex < data.length()) { 485 currentIndex++; 486 if (currentIndex < data.length()) { 487 curChar = data.charAt(currentIndex); 488 } 489 } 490 // grab the first character of this new line (no longer part of the comment 491 currentIndex++; 492 if (currentIndex < data.length()) { 493 curChar = data.charAt(currentIndex); 494 } 495 496 } else if (curChar == '\\') // OR if its an escape char //((UChar)0x005C) - \ 497 { 498 // skip over the \ and then skip any line breaks that may follow 499 do { 500 currentIndex++; 501 if (currentIndex < data.length()) { 502 curChar = data.charAt(currentIndex); 503 } 504 } while (isNewLine(curChar) && currentIndex < data.length()); 505 506 // if it's a u and there are 4 more characters after it 507 if (curChar == 'u' && (data.length() - currentIndex) >= 4) { 508 // convert it to a character from a codepoint (String)UTF16.valueOf(int) 509 510 String hexString = data.substring(currentIndex + 1, currentIndex + 5); 511 int codeNum = Integer.parseInt(hexString, 16); 512 String temp = UTF16.valueOf(codeNum); 513 char tempChar; 514 515 tempChar = temp.charAt(0); 516 517 // if its 0xFFFFFFFF 518 if (tempChar == 0xFFFFFFFF) { 519 System.err.println( 520 "Invalid character found while processing file."); 521 System.exit(-1); 522 } 523 // if NOT whitespace(isUWhiteSpace) && NOT a control character? && not 524 // punctuation 525 if (!isUWhiteSpace(tempChar) 526 && !isPunctuation(tempChar) 527 && !isControl(tempChar)) { 528 // set the current character to this character 529 curChar = tempChar; 530 currentIndex += 531 4; // the 4 numbers...will add one more for the u, already 532 // did one for 533 // the slash 534 if (temp.length() > 1) { 535 curChar2 = temp.charAt(1); 536 needChar2 = true; 537 } 538 } 539 } 540 541 } else if (curChar == '\'') // OR if it's a quote 542 { 543 quoted = !quoted; 544 } 545 } // end not quoted 546 547 if (isNewLine(curChar)) { 548 quoted = false; 549 // while we're not hitting the end of the string 550 while (currentIndex < data.length()) { 551 if (!isNewLine(curChar)) { 552 break; 553 } 554 currentIndex++; 555 if (currentIndex < data.length()) { 556 curChar = data.charAt(currentIndex); 557 } 558 } 559 continue; 560 } 561 562 // append the character to the new string, because we've decided it's ok 563 newData += curChar; 564 currentIndex++; 565 if (needChar2) { 566 newData += curChar2; 567 } 568 } // end loop 569 570 } catch (Exception e) { 571 System.err.println("Had a problem..."); 572 } 573 if (newData.length() > data.length()) { 574 return null; 575 } 576 return newData; 577 } 578 addDependency( ICUResourceWriter.ResourceTable parent, ICUResourceWriter.ResourceProcess dep, String outDir)579 private static void addDependency( 580 ICUResourceWriter.ResourceTable parent, 581 ICUResourceWriter.ResourceProcess dep, 582 String outDir) { 583 String filename; 584 File f; 585 586 filename = outDir; 587 if (!(outDir.charAt(outDir.length() - 1) == '/' 588 || outDir.charAt(outDir.length() - 1) == '\\')) { 589 filename += "/"; 590 } 591 592 filename += dep.val; 593 594 f = new File(filename); 595 if (!f.exists()) { 596 System.err.println("WARNING: Could not find dependancy: " + filename); 597 } 598 // create the %%DEPENDENCY array with a string containing the path, add it to the table. 599 ICUResourceWriter.ResourceArray a = new ICUResourceWriter.ResourceArray(); 600 a.name = "%%DEPENDENCY"; 601 ICUResourceWriter.ResourceString str = new ICUResourceWriter.ResourceString(null, dep.val); 602 a.first = str; 603 dep.addAfter(a); 604 605 // Remove the ResourceProcess object and replace it with a ResourceString object. 606 ICUResourceWriter.ResourceString replacement = 607 new ICUResourceWriter.ResourceString(dep.name, dep.val); 608 609 ICUResourceWriter.Resource current = parent.first; 610 611 // yes, we're using an address comparison below...because they should both be pointing the 612 // the same object when 613 // we find it. 614 while (current != null && current.next != dep) { 615 current = current.next; 616 } 617 replacement.next = dep.next; 618 current.next = replacement; 619 } 620 addCollationElements(ICUResourceWriter.Resource elementTable)621 private static void addCollationElements(ICUResourceWriter.Resource elementTable) { 622 // Element table name is "Collation" 623 // loops through sub tables of Collation and adds CollationBinary as nessisary 624 ICUResourceWriter.Resource cur = elementTable.first; 625 626 while (cur != null) { 627 addCollation(cur); 628 cur = cur.next; 629 } 630 } 631 addCollation(ICUResourceWriter.Resource element)632 private static void addCollation(ICUResourceWriter.Resource element) { 633 ICUResourceWriter.Resource cur = element.first; 634 635 while (cur != null) { 636 if (cur.hasKey && (cur instanceof ICUResourceWriter.ResourceString)) { 637 ICUResourceWriter.ResourceString strElement = 638 (ICUResourceWriter.ResourceString) cur; 639 640 if (strElement.name.equals("Sequence")) { 641 try { 642 // RuleBasedCollator rbc = new RuleBasedCollator(strElement.val); 643 // TODO Generate proper binary data for Collator 644 /* 645 * currently CollatorWriter does not work properly 646 * Need to write something to generate proper bytes, 647 * bytes do not seem to exist at this time 648 * CollatorWriter was not committed to the ICU4J trunk, it currently lives in the bdrower 649 * subdirectory of icu4j in the IBM local cvs 650 */ 651 // byte[] bytes = CollatorWriter.writeRBC(rbc); 652 // ICUResourceWriter.ResourceBinary b = new 653 // ICUResourceWriter.ResourceBinary(); 654 // b.data = bytes; 655 // b.name = "%%CollationBin"; 656 // element.addAfter(b); 657 658 } catch (Exception e) { 659 System.err.println("Could not create Collation Binary"); 660 } 661 } 662 } 663 cur = cur.next; 664 } 665 } 666 667 /** 668 * Write the header section of the file. This section of the file currently contains:<br> 669 * -A 2 byte number containing the length (in bytes) of the header.<br> 670 * -Two "magic numbers" each 1 byte in size.<br> 671 * -The UDataInfo structure -The null terminated copyright string (if it should be written) 672 * 673 * @param out 674 * @param info 675 * @param copyright 676 */ writeBinaryHeader(FileOutputStream out, UDataInfo info, String copyright)677 private static void writeBinaryHeader(FileOutputStream out, UDataInfo info, String copyright) { 678 short headSize = 0; 679 byte[] magics = new byte[2]; 680 int pad = 0; 681 byte[] padding; 682 /* 683 * The header includes a 2 byte number containing the size of the header, 684 * two magic numbers each 1 byte in size, the UDataInfo structure, and the 685 * copyright plus null terminator. Subject to change. 686 */ 687 headSize += 688 info.size 689 + BYTES_TAKEN_BY_HEADER_SIZE 690 + BYTES_TAKEN_BY_MAGIC1 691 + BYTES_TAKEN_BY_MAGIC2; 692 if (copyright != null && INCLUDE_COPYRIGHT) { 693 headSize += copyright.length() + 1; 694 } 695 if ((pad = pad16Bytes(headSize)) != 0) { 696 headSize += pad; 697 } 698 699 magics[0] = MAGIC1; 700 magics[1] = MAGIC2; 701 702 try { 703 // write the size of the header 704 out.write(shortToBytes(headSize)); 705 written += (shortToBytes(headSize)).length; 706 707 // write the two magic numbers 708 out.write(magics); 709 written += magics.length; 710 711 // write the UDataInfo structure 712 out.write(info.getByteArray()); 713 written += info.getByteArray().length; 714 715 // write the copyright and null terminating byte(s) if writing it 716 if (copyright != null && INCLUDE_COPYRIGHT) { 717 out.write((copyright + "\0").getBytes(CHARSET8)); 718 written += ((copyright + "\0").getBytes(CHARSET8)).length; 719 } 720 721 if (pad != 0) { 722 padding = new byte[pad]; 723 for (int i = 0; i < padding.length; i++) { 724 padding[i] = 0; 725 } 726 out.write(padding); 727 written += padding.length; 728 } 729 730 } catch (IOException e) { 731 printError(e.getMessage()); 732 e.printStackTrace(); 733 System.exit(1); 734 } 735 } 736 737 /** 738 * Write some information about the key string and then write a chunk of bytes which mirrors the 739 * SRBRoot->fkeys character buffer. This will be a list of null terminated strings. Each string 740 * pertains to a certain resource. This method also modifies the resources in 'resTop' by 741 * setting the keyStringOffset variable. The keyStringOffset variable is the number of bytes 742 * from the start of the key string that the resources key starts. For example: 743 * 744 * <p>In the 'en_PK' locale, you may have a Table resource with the key "Version." The Table 745 * contains a string resource with the key "1.31." 746 * 747 * <p>If this were the whole of the locale data, the key string would be an encoded version of 748 * this: "Version\01.31\0" <br> 749 * <br> 750 * In UTF-16 encoding, each character will take 2 bytes. <br> 751 * keyStringOffset for the table object would be 0. <br> 752 * keyStringOffset for the string resource would be = "Version".length() + 2 = 16 753 * 754 * @param out The output stream to write this to. 755 * @param resTop The top of the resource tree whose keys shall be written 756 */ writeKeyString(FileOutputStream out, ICUResourceWriter.Resource resTop)757 private static int writeKeyString(FileOutputStream out, ICUResourceWriter.Resource resTop) { 758 String keyList = ""; 759 byte[] padding = null; 760 int padBytes = 0; 761 int end; 762 int root; 763 byte[] rootBytes; 764 int[] indexes = new int[URES_INDEX_TOP]; 765 byte[] indexBytes = new byte[URES_INDEX_TOP * 4]; 766 byte[] keyBytes; 767 int usedOffset; 768 int sizeOfIndexes; 769 int sizeOfIndexesAndKeys; 770 int tableID; 771 772 // set flag so that we know which resource is the top of the tree 773 resTop.isTop = true; 774 775 sizeOfIndexes = (1 + URES_INDEX_TOP) * ICUResourceWriter.SIZE_OF_INT; 776 777 usedOffset = sizeOfIndexes; 778 779 // Build the String of keys 780 keyList = buildKeyList(keyList, resTop, usedOffset); 781 782 sizeOfIndexesAndKeys = sizeOfIndexes + keyList.length(); 783 usedOffset = sizeOfIndexesAndKeys + pad32(sizeOfIndexesAndKeys); 784 785 end = sizeOfIndexesAndKeys + resTop.sizeOfChildren; 786 787 // if it is not 16 byte aligned 788 if ((padBytes = pad32(sizeOfIndexesAndKeys)) != 0) { 789 padding = createPadding(padBytes); 790 if (padding != null) { 791 usedOffset += padding.length; 792 end += padding.length; 793 } 794 } 795 796 // build a set of 32 bits (in C this variable is called 'root' in reslist.c) 797 // the number of bytes included in the keyList, keyList padding, all the children 798 799 if (((ICUResourceWriter.ResourceTable) resTop).is32Bit()) { 800 tableID = (URES_TABLE32 << 28); 801 } else { 802 tableID = (URES_TABLE << 28); 803 } 804 root = (end >>> 2) | (tableID); 805 806 rootBytes = intToBytes(root); 807 808 end += resTop.size; 809 810 end += pad32(end); 811 812 indexes[URES_INDEX_LENGTH] = URES_INDEX_TOP; 813 indexes[URES_INDEX_STRINGS_TOP] = usedOffset >>> 2; 814 indexes[URES_INDEX_RESOURCES_TOP] = (end) >> 2; 815 indexes[URES_INDEX_BUNDLE_TOP] = indexes[URES_INDEX_RESOURCES_TOP]; 816 indexes[URES_INDEX_MAX_TABLE_LENGTH] = ICUResourceWriter.maxTableLength; 817 818 indexBytes = intArrayToBytes(indexes); 819 820 try { 821 // write the "root" object 822 out.write(rootBytes); 823 written += rootBytes.length; 824 825 // write the indexes array 826 out.write(indexBytes); 827 written += indexBytes.length; 828 829 // write the keyList and padding if nessicary 830 keyBytes = keyList.getBytes(CHARSET8); 831 out.write(keyBytes); 832 written += keyBytes.length; 833 834 if (padding != null) { 835 out.write(padding); 836 written += padding.length; 837 } 838 } catch (IOException e) { 839 printError("Could not write key string to file. " + e.getMessage()); 840 System.exit(1); 841 } 842 843 return usedOffset; 844 } 845 846 /** 847 * Recursively go through the whole tree and continue to add to the keyList. As this is done, 848 * set the keyStringOffset, numChildren, sizeOfChildren, and size variables. 849 * 850 * @param keyList The current string of keys. 851 * @param resTop The resource whose keys shall be written to the keyList. 852 * @return 853 */ buildKeyList( String keyList, ICUResourceWriter.Resource resTop, int usedOffset)854 private static String buildKeyList( 855 String keyList, ICUResourceWriter.Resource resTop, int usedOffset) { 856 ICUResourceWriter.Resource current = resTop.first; 857 int x = 0; 858 859 // add this resources key to the list unless it is the top resource or doesn't have a key 860 if (!resTop.isTop && resTop.hasKey) { 861 // clean up quotes if any 862 if (resTop.name.indexOf("\"") >= 0) { 863 resTop.name = removeQuotes(resTop.name); 864 } 865 // set the keyStringOffset 866 resTop.keyStringOffset = usedOffset + (keyList.length() * BYTES_PER_UTF8_CHAR); 867 keyList += (resTop.name + "\0"); 868 } 869 870 // if it has children, call this method on them too 871 while (current != null) { 872 if (resTop instanceof ICUResourceWriter.ResourceArray 873 || resTop instanceof ICUResourceWriter.ResourceIntVector) { 874 current.hasKey = false; 875 } 876 877 keyList = buildKeyList(keyList, current, usedOffset); 878 x++; 879 880 // add the size of the current child to the parents sizeOfChildren 881 882 current = current.next; 883 } 884 885 // set the size of this object 886 resTop.setSize(); 887 888 resTop.numChildren = x; 889 return keyList; 890 } 891 892 /** 893 * Takes a 16 bit number and returns a two byte array. 0th element is lower byte, 1st element is 894 * upper byte. Ex: x = 28,000. In binary: 0110 1101 0110 0000. This method will return: [0] = 895 * 0110 0000 or 0x60 [1] = 0110 1101 or 0x6D 896 */ shortToBytes(short x)897 private static byte[] shortToBytes(short x) { 898 byte[] b = new byte[2]; 899 b[1] = (byte) (x); // bitwise AND with the lower byte 900 b[0] = (byte) (x >>> 8); // shift four bits to the right and fill with zeros, and 901 // then bitwise and with the 902 // lower byte 903 return b; 904 } 905 906 /** Takes a 32 bit integer and returns an array of 4 bytes. */ intToBytes(int x)907 private static byte[] intToBytes(int x) { 908 byte[] b = new byte[4]; 909 b[3] = (byte) (x); // just the last byte 910 911 x = x >>> 8; // shift each byte over one spot. 912 b[2] = (byte) (x); // just the last byte 913 914 x = x >>> 8; // shift each byte over one spot. 915 b[1] = (byte) (x); // just the last byte 916 917 x = x >>> 8; // shift each byte over one spot. 918 b[0] = (byte) (x); // just the last byte 919 920 return b; 921 } 922 923 /** 924 * Takes an array of integers and returns a byte array of the memory representation. 925 * 926 * @param x 927 * @return 928 */ intArrayToBytes(int[] x)929 private static byte[] intArrayToBytes(int[] x) { 930 byte[] b = new byte[x.length * 4]; 931 byte[] temp; 932 int i, z; 933 934 for (i = 0; i < x.length; i++) { 935 temp = intToBytes(x[i]); 936 for (z = 0; z < 4; z++) { 937 b[(i * 4) + z] = temp[z]; 938 } 939 } 940 return b; 941 } 942 943 /** 944 * calculate the padding to make things align with 32 bits (aka 4 bytes) 945 * 946 * @param x 947 * @return 948 */ pad32(int x)949 private static int pad32(int x) { 950 return ((x % 4) == 0) ? 0 : (4 - (x % 4)); 951 } 952 pad16Bytes(int x)953 private static int pad16Bytes(int x) { 954 return ((x % 16) == 0) ? 0 : (16 - (x % 16)); 955 } 956 957 /** for printing errors. */ printError(String message)958 private static void printError(String message) { 959 960 System.err.println("LDML2ICUBinaryWriter : ERROR : " + message); 961 } 962 createPadding(int length)963 private static byte[] createPadding(int length) { 964 byte x = (byte) 0x00; 965 byte[] b = new byte[length]; 966 if (length == 0) { 967 return null; 968 } 969 for (int z = 0; z < b.length; z++) { 970 b[z] = x; 971 } 972 973 return b; 974 } 975 removeQuotes(String s)976 public static String removeQuotes(String s) { 977 String temp = s; 978 String temp2; 979 int x; 980 while (temp.indexOf("\"") >= 0) { 981 x = temp.indexOf("\""); 982 temp2 = temp.substring(0, x); 983 temp2 += temp.substring(x + 1, temp.length()); 984 temp = temp2; 985 } 986 987 return temp; 988 } 989 } 990