1 /* 2 ******************************************************************************* 3 * Copyright (C) 2003-2012, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ******************************************************************************* 6 */ 7 8 package org.unicode.cldr.icu; 9 10 import java.io.File; 11 import java.io.FileNotFoundException; 12 import java.io.FileOutputStream; 13 import java.io.IOException; 14 import java.util.Hashtable; 15 16 import org.unicode.cldr.util.CldrUtility; 17 18 import com.ibm.icu.lang.UCharacter; 19 import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory; 20 import com.ibm.icu.text.UTF16; 21 22 /** 23 * The LDML2ICUBinaryWriter class is a set of methods which can be used 24 * to generate Binary (.res) files in the ICU Binary format. 25 * 26 * @author Brian Rower - June 2008 27 * 28 */ 29 public class LDML2ICUBinaryWriter { 30 /** 31 * This string is the copyright to be written into the file. 32 * In the C version, can be found in <I>icu4c_root</I>/source/common/unicode/uversion.h 33 */ 34 private static final String COPYRIGHT = CldrUtility.getCopyrightString(); 35 36 public static int written = 0; 37 38 /** 39 * Magic numbers!!!! 40 */ 41 private static final byte MAGIC1 = (byte) 0xda; 42 private static final byte MAGIC2 = 0x27; 43 44 private static boolean INCLUDE_COPYRIGHT = false; 45 /** 46 * The number of bytes it takes to write magic number 1. 47 */ 48 private static final short BYTES_TAKEN_BY_MAGIC1 = 1; 49 50 /** 51 * The number of bytes it takes to write magic number 2; 52 */ 53 private static final short BYTES_TAKEN_BY_MAGIC2 = 1; 54 55 /** 56 * The number of bytes that it takes to write the size of the header. 57 */ 58 private static final short BYTES_TAKEN_BY_HEADER_SIZE = 2; 59 60 /** 61 * The charsets to be used when encoding strings. 62 */ 63 public static final String CHARSET8 = "UTF-8"; 64 public static final String CHARSET16 = "UTF-16BE"; 65 66 /** 67 * The number of bytes that each character takes up. This is dependant on the encoding (see CHARSET above). 68 */ 69 private static final int BYTES_PER_UTF8_CHAR = 1; 70 71 /** 72 * Numeric constants for special elements. 73 */ 74 private static final int SPECIAL_NONE = 0; 75 private static final int SPECIAL_COLLATIONS = 1; 76 private static final int SPECIAL_COLLATIONELEMENTS = 2; 77 private static final int SPECIAL_DEPENDENCY = 3; 78 private static final int SPECIAL_TRANSLITERATOR = 4; 79 80 /** 81 * Numeric constants for types of resource items. 82 * 83 * @see ures_getType 84 * @stable ICU 2.0 85 */ 86 87 // **************************** ENUM Below is ported from C. See ures.h *********************** 88 89 /** Resource type constant for "no resource". @stable ICU 2.6 */ 90 public static final int URES_NONE = -1; 91 92 /** Resource type constant for 16-bit Unicode strings. @stable ICU 2.6 */ 93 public static final int URES_STRING = 0; 94 95 /** Resource type constant for binary data. @stable ICU 2.6 */ 96 public static final int URES_BINARY = 1; 97 98 /** Resource type constant for tables of key-value pairs. @stable ICU 2.6 */ 99 public static final int URES_TABLE = 2; 100 101 /** 102 * Resource type constant for aliases; 103 * internally stores a string which identifies the actual resource 104 * storing the data (can be in a different resource bundle). 105 * Resolved internally before delivering the actual resource through the API. 106 * 107 * @stable ICU 2.6 108 */ 109 public static final int URES_ALIAS = 3; 110 111 /** 112 * Internal use only. 113 * Alternative resource type constant for tables of key-value pairs. 114 * Never returned by ures_getType(). 115 * 116 * @internal 117 */ 118 public static final int URES_TABLE32 = 4; 119 120 /** 121 * Resource type constant for a single 28-bit integer, interpreted as 122 * signed or unsigned by the ures_getInt() or ures_getUInt() function. 123 * 124 * @see ures_getInt 125 * @see ures_getUInt 126 * @stable ICU 2.6 127 */ 128 public static final int URES_INT = 7; 129 130 /** Resource type constant for arrays of resources. @stable ICU 2.6 */ 131 public static final int URES_ARRAY = 8; 132 133 /** 134 * Resource type constant for vectors of 32-bit integers. 135 * 136 * @see ures_getIntVector 137 * @stable ICU 2.6 138 */ 139 public static final int URES_INT_VECTOR = 14; 140 141 public static final int URES_LIMIT = 16; 142 143 /* 144 * The enum below is ported from C. See uresdata.h 145 * 146 * It is used as index references for the array which will be written. 147 */ 148 /* [0] contains URES_INDEX_TOP==the length of indexes[] */ 149 private static final int URES_INDEX_LENGTH = 0; 150 /* [1] contains the top of the strings, same as the bottom of resources, rounded up */ 151 private static final int URES_INDEX_STRINGS_TOP = 1; 152 /* [2] contains the top of all resources */ 153 private static final int URES_INDEX_RESOURCES_TOP = 2; 154 /* [3] contains the top of the bundle, in case it were ever different from [2] */ 155 private static final int URES_INDEX_BUNDLE_TOP = 3; 156 /* [4] max. length of any table */ 157 private static final int URES_INDEX_MAX_TABLE_LENGTH = 4; 158 /* [5] attributes bit set, see URES_ATT_* (new in formatVersion 1.2) */ 159 // private static final int URES_INDEX_ATTRIBUTES = 5; 160 /* This one is the length of the array */ 161 private static final int URES_INDEX_TOP = 6; 162 163 // must be set if writing transliteration 164 private static Hashtable<String, String> ruleStringsHash = null; 165 main()166 public static void main() { 167 168 } 169 170 /** 171 * This method is called upon the top of an ICUResourceWriter.Resource 172 * in order to write the whole Resource tree into binary format. 173 * 174 * @param resTop 175 * The top of the resource tree that you would like written to file. This 176 * object should be a ICUResourceWriter.ResourceTable. 177 * @param outDir 178 * A string pointing to the path of the output directory. 179 * @param outFile 180 * The name of the output file. If filename has an extension other than .res 181 * (ex: .txt) this method will strip that extention and replace with .res. 182 */ writeBinaryFile(ICUResourceWriter.Resource resTop, String outDir, String outFile)183 public static void writeBinaryFile(ICUResourceWriter.Resource resTop, String outDir, String outFile) { 184 String fileName = ""; 185 int usedOffset = 0; 186 String directoryPath = ""; 187 FileOutputStream out; 188 UDataInfo info; 189 byte[] dataFormat; 190 byte[] formatVersion; 191 byte[] dataVersion; 192 byte[] padding; 193 194 // Do some checks on the file name 195 // if it has a period in it...get rid of everything after the period 196 if (outFile.indexOf('.') > -1) { 197 fileName = outFile.substring(0, outFile.indexOf('.')); 198 if (fileName.length() == 0) { 199 printError(outFile + " is not a valid file name."); 200 System.exit(1); 201 } 202 fileName = fileName + ".res"; 203 } else { 204 fileName = outFile + ".res"; 205 } 206 // add the .res part to the file name 207 208 // do some checks on the directory path 209 // replace all backslashes with forward slashes 210 directoryPath = outDir.replace('\\', '/'); 211 212 // if the path does not end in a slash, then we'll add one 213 if (directoryPath.charAt(directoryPath.length() - 1) != '/') { 214 directoryPath = directoryPath + "/"; 215 } 216 217 // create UDataInfo 218 // Data format is "ResB" 219 dataFormat = new byte[4]; 220 dataFormat[0] = 0x52; // R 221 dataFormat[1] = 0x65; // e 222 dataFormat[2] = 0x73; // s 223 dataFormat[3] = 0x42; // B 224 225 // Format version is 1.2.0.0 226 formatVersion = new byte[4]; 227 formatVersion[0] = 1; 228 formatVersion[1] = 2; 229 formatVersion[2] = 0; 230 formatVersion[3] = 0; 231 232 // data version is 1.4.0.0 233 dataVersion = new byte[4]; 234 dataVersion[0] = 1; 235 dataVersion[1] = 4; 236 dataVersion[2] = 0; 237 dataVersion[3] = 0; 238 239 // now that the file and directory name are formatted, lets try to create an output stream 240 try { 241 System.out.println("Creating file: " + directoryPath + fileName); 242 File f = new File(directoryPath, fileName); 243 out = new FileOutputStream(f); 244 245 info = new UDataInfo(UDataInfo.getSize(), (short) 0, UDataInfo.BIGENDIAN, UDataInfo.ASCII_FAMILY, 246 UDataInfo.SIZE_OF_UCHAR, (byte) 0, dataFormat, formatVersion, dataVersion); 247 248 // this method goes through the tree and looks for a table named CollationElements or Collations, and adds 249 // the 250 // appropriate data to the tree 251 dealWithSpecialElements(resTop, outDir); 252 253 // before we do anything with the resources, sort them 254 resTop.sort(); 255 256 // call writeBinaryHeader. 257 writeBinaryHeader(out, info, COPYRIGHT); 258 259 usedOffset = writeKeyString(out, resTop); 260 261 // Call writeBinary on the top of the Resource tree 262 263 usedOffset = resTop.writeBinary(out, usedOffset); 264 padding = createPadding(pad32(usedOffset)); 265 if (padding != null) { 266 out.write(padding); 267 written += padding.length; 268 } 269 out.close(); 270 System.out.println("Finished writing binary."); 271 } catch (FileNotFoundException e) { 272 printError(directoryPath + fileName + " could not be opened, please ensure the correct path is given."); 273 e.printStackTrace(); 274 System.exit(1); 275 } catch (SecurityException e) { 276 printError("access denied: " + directoryPath + fileName); 277 e.printStackTrace(); 278 System.exit(1); 279 } catch (Exception e) { 280 printError(e.getMessage()); 281 System.exit(1); 282 } 283 } 284 getSpecialType(ICUResourceWriter.Resource res)285 private static int getSpecialType(ICUResourceWriter.Resource res) { 286 if (!res.hasKey) { 287 return SPECIAL_NONE; 288 } 289 290 if (res.name.equals("CollationElements") && res instanceof ICUResourceWriter.ResourceTable) { 291 return SPECIAL_COLLATIONELEMENTS; 292 } 293 294 if (res.name.equals("collations") && res instanceof ICUResourceWriter.ResourceTable) { 295 return SPECIAL_COLLATIONS; 296 } 297 298 if (res.name.equals("depends") && res instanceof ICUResourceWriter.ResourceProcess) { 299 return SPECIAL_DEPENDENCY; 300 } 301 302 if (res instanceof ICUResourceWriter.ResourceProcess) { 303 if (((ICUResourceWriter.ResourceProcess) res).ext.equals(ICUResourceWriter.TRANSLITERATOR)) { 304 return SPECIAL_TRANSLITERATOR; 305 } 306 } 307 308 return SPECIAL_NONE; 309 } 310 311 /** 312 * 313 * Goes through the resource tree recursively and looks for a table named 314 * CollationElements, collations, dependency, or transliterator and adds the appropriate data 315 * 316 * @param top 317 * The top of the Resource Tree 318 */ dealWithSpecialElements(ICUResourceWriter.Resource top, String outDir)319 private static void dealWithSpecialElements(ICUResourceWriter.Resource top, String outDir) { 320 // if it's a table 321 if (top instanceof ICUResourceWriter.ResourceTable) { 322 // loop through all it's elements and check if they're anything specialCollationElements or Collation 323 ICUResourceWriter.Resource cur = top.first; 324 while (cur != null) { 325 switch (getSpecialType(cur)) { 326 case SPECIAL_COLLATIONELEMENTS: 327 addCollation(cur); 328 break; 329 case SPECIAL_COLLATIONS: 330 addCollationElements(cur); 331 break; 332 case SPECIAL_DEPENDENCY: 333 addDependency((ICUResourceWriter.ResourceTable) top, (ICUResourceWriter.ResourceProcess) cur, 334 outDir); 335 break; 336 case SPECIAL_TRANSLITERATOR: 337 addTransliteration((ICUResourceWriter.ResourceTable) top, (ICUResourceWriter.ResourceProcess) cur); 338 break; 339 case SPECIAL_NONE: 340 default: 341 dealWithSpecialElements(cur, outDir); 342 } 343 344 cur = cur.next; 345 } 346 } 347 // if it's not a table...don't do anything... 348 } 349 setRulesHash(Hashtable<String, String> hash)350 public static void setRulesHash(Hashtable<String, String> hash) { 351 ruleStringsHash = hash; 352 } 353 354 // Parallels the C function for parseTransliterator in parse.c of genrb addTransliteration(ICUResourceWriter.ResourceTable parent, ICUResourceWriter.ResourceProcess trans)355 private static void addTransliteration(ICUResourceWriter.ResourceTable parent, 356 ICUResourceWriter.ResourceProcess trans) { 357 if (ruleStringsHash == null) { 358 System.err.println("If you are processing transliteration, you must set the Rules Hashtable."); 359 System.exit(-1); 360 } 361 362 String dataString = ruleStringsHash.get(trans.val); 363 364 if (dataString == null) { 365 System.err.println("Could not find data for: " + trans.val); 366 System.exit(-1); 367 } 368 369 // strip out the unneeded stuff from the buffer (like comments and spaces and line breaks 370 dataString = stripRules(dataString); 371 372 // create a string resource containing the data and add it to the resource tree 373 // remove the ResourceProcess and add the String 374 375 ICUResourceWriter.ResourceString replacement = new ICUResourceWriter.ResourceString("Resource", dataString); 376 377 ICUResourceWriter.Resource current = parent.first; 378 379 // yes, we're using an address comparison below...because they should both be pointing the the same object when 380 // we find it. 381 if (current != trans) { 382 while (current != null && current.next != trans) { 383 current = current.next; 384 } 385 if (current != null) { 386 replacement.next = trans.next; 387 current.next = replacement; 388 } else { 389 System.err.println("An unexpected error has occured: Could not find Transliteration resource."); 390 System.exit(-1); 391 } 392 } else { 393 replacement.next = trans.next; 394 parent.first = replacement; 395 } 396 397 } 398 isUWhiteSpace(char c)399 private static boolean isUWhiteSpace(char c) { 400 return (c >= 0x0009 && c <= 0x2029 && (c <= 0x000D || c == 0x0020 || c == 0x0085 || 401 c == 0x200E || c == 0x200F || c >= 0x2028)); 402 } 403 isNewLine(char c)404 private static boolean isNewLine(char c) { 405 if (c == 0x000d || c == 0x000a) { 406 return true; 407 } 408 return false; 409 } 410 isPunctuation(char c)411 private static boolean isPunctuation(char c) { 412 int x = UCharacter.getType(c); 413 switch (x) { 414 case ECharacterCategory.CONNECTOR_PUNCTUATION: 415 case ECharacterCategory.DASH_PUNCTUATION: 416 case ECharacterCategory.END_PUNCTUATION: 417 case ECharacterCategory.FINAL_PUNCTUATION: 418 case ECharacterCategory.INITIAL_PUNCTUATION: 419 case ECharacterCategory.OTHER_PUNCTUATION: 420 case ECharacterCategory.START_PUNCTUATION: 421 return true; 422 default: 423 return false; 424 } 425 } 426 isControl(char c)427 private static boolean isControl(char c) { 428 int x = UCharacter.getType(c); 429 switch (x) { 430 case ECharacterCategory.CONTROL: 431 return true; 432 default: 433 return false; 434 } 435 } 436 437 // parallels the C++ function utrans_stripRules in rbt_pars.cpp in i18n project stripRules(String data)438 private static String stripRules(String data) { 439 String newData = ""; 440 int currentIndex = 0; 441 char curChar; 442 char curChar2 = '0'; 443 boolean needChar2 = false; 444 boolean quoted = false; 445 446 try { 447 448 while (currentIndex < data.length()) { 449 needChar2 = false; 450 curChar = data.charAt(currentIndex); 451 // if it's a quote, set the flag 452 if (curChar == '\'') { 453 quoted = !quoted; 454 } 455 // otherwise...if the quote flag is NOT set. 456 else if (!quoted) { 457 // IF comment... ignore comment lines ...starting with #....and until a carriage return or line feed 458 if (curChar == '#') { 459 // if the preceeding characters were whitepace or new lines, go back and get rid of them 460 461 while (newData.length() > 0 462 && (isNewLine(newData.charAt(newData.length() - 1)) || isUWhiteSpace(newData.charAt(newData 463 .length() - 1)))) { 464 if (newData.length() == 1) { 465 newData = ""; 466 } else { 467 newData = newData.substring(0, newData.length() - 2); 468 } 469 470 } 471 472 // move to the end of the line 473 while (!isNewLine(curChar) && currentIndex < data.length()) { 474 currentIndex++; 475 if (currentIndex < data.length()) { 476 curChar = data.charAt(currentIndex); 477 } 478 } 479 // grab the first character of this new line (no longer part of the comment 480 currentIndex++; 481 if (currentIndex < data.length()) { 482 curChar = data.charAt(currentIndex); 483 } 484 485 } else if (curChar == '\\') // OR if its an escape char //((UChar)0x005C) - \ 486 { 487 // skip over the \ and then skip any line breaks that may follow 488 do { 489 currentIndex++; 490 if (currentIndex < data.length()) { 491 curChar = data.charAt(currentIndex); 492 } 493 } while (isNewLine(curChar) && currentIndex < data.length()); 494 495 // if it's a u and there are 4 more characters after it 496 if (curChar == 'u' && (data.length() - currentIndex) >= 4) { 497 // convert it to a character from a codepoint (String)UTF16.valueOf(int) 498 499 String hexString = data.substring(currentIndex + 1, currentIndex + 5); 500 int codeNum = Integer.parseInt(hexString, 16); 501 String temp = UTF16.valueOf(codeNum); 502 char tempChar; 503 504 tempChar = temp.charAt(0); 505 506 // if its 0xFFFFFFFF 507 if (tempChar == 0xFFFFFFFF) { 508 System.err.println("Invalid character found while processing file."); 509 System.exit(-1); 510 } 511 // if NOT whitespace(isUWhiteSpace) && NOT a control character? && not punctuation 512 if (!isUWhiteSpace(tempChar) && !isPunctuation(tempChar) && !isControl(tempChar)) { 513 // set the current character to this character 514 curChar = tempChar; 515 currentIndex += 4; // the 4 numbers...will add one more for the u, already did one for 516 // the slash 517 if (temp.length() > 1) { 518 curChar2 = temp.charAt(1); 519 needChar2 = true; 520 } 521 } 522 523 } 524 525 } else if (curChar == '\'')// OR if it's a quote 526 { 527 quoted = !quoted; 528 } 529 } // end not quoted 530 531 if (isNewLine(curChar)) { 532 quoted = false; 533 // while we're not hitting the end of the string 534 while (currentIndex < data.length()) { 535 if (!isNewLine(curChar)) { 536 break; 537 } 538 currentIndex++; 539 if (currentIndex < data.length()) { 540 curChar = data.charAt(currentIndex); 541 } 542 } 543 continue; 544 } 545 546 // append the character to the new string, because we've decided it's ok 547 newData += curChar; 548 currentIndex++; 549 if (needChar2) { 550 newData += curChar2; 551 } 552 } // end loop 553 554 } catch (Exception e) { 555 System.err.println("Had a problem..."); 556 } 557 if (newData.length() > data.length()) { 558 return null; 559 } 560 return newData; 561 } 562 addDependency(ICUResourceWriter.ResourceTable parent, ICUResourceWriter.ResourceProcess dep, String outDir)563 private static void addDependency(ICUResourceWriter.ResourceTable parent, ICUResourceWriter.ResourceProcess dep, 564 String outDir) { 565 String filename; 566 File f; 567 568 filename = outDir; 569 if (!(outDir.charAt(outDir.length() - 1) == '/' || outDir.charAt(outDir.length() - 1) == '\\')) { 570 filename += "/"; 571 } 572 573 filename += dep.val; 574 575 f = new File(filename); 576 if (!f.exists()) { 577 System.err.println("WARNING: Could not find dependancy: " + filename); 578 } 579 // create the %%DEPENDENCY array with a string containing the path, add it to the table. 580 ICUResourceWriter.ResourceArray a = new ICUResourceWriter.ResourceArray(); 581 a.name = "%%DEPENDENCY"; 582 ICUResourceWriter.ResourceString str = new ICUResourceWriter.ResourceString(null, dep.val); 583 a.first = str; 584 dep.addAfter(a); 585 586 // Remove the ResourceProcess object and replace it with a ResourceString object. 587 ICUResourceWriter.ResourceString replacement = new ICUResourceWriter.ResourceString(dep.name, dep.val); 588 589 ICUResourceWriter.Resource current = parent.first; 590 591 // yes, we're using an address comparison below...because they should both be pointing the the same object when 592 // we find it. 593 while (current != null && current.next != dep) { 594 current = current.next; 595 } 596 replacement.next = dep.next; 597 current.next = replacement; 598 599 } 600 addCollationElements(ICUResourceWriter.Resource elementTable)601 private static void addCollationElements(ICUResourceWriter.Resource elementTable) { 602 // Element table name is "Collation" 603 // loops through sub tables of Collation and adds CollationBinary as nessisary 604 ICUResourceWriter.Resource cur = elementTable.first; 605 606 while (cur != null) { 607 addCollation(cur); 608 cur = cur.next; 609 } 610 } 611 addCollation(ICUResourceWriter.Resource element)612 private static void addCollation(ICUResourceWriter.Resource element) { 613 ICUResourceWriter.Resource cur = element.first; 614 615 while (cur != null) { 616 if (cur.hasKey && (cur instanceof ICUResourceWriter.ResourceString)) { 617 ICUResourceWriter.ResourceString strElement = (ICUResourceWriter.ResourceString) cur; 618 619 if (strElement.name.equals("Sequence")) { 620 try { 621 // RuleBasedCollator rbc = new RuleBasedCollator(strElement.val); 622 // TODO Generate proper binary data for Collator 623 /* 624 * currently CollatorWriter does not work properly 625 * Need to write something to generate proper bytes, 626 * bytes do not seem to exist at this time 627 * CollatorWriter was not committed to the ICU4J trunk, it currently lives in the bdrower 628 * subdirectory of icu4j in the IBM local cvs 629 */ 630 // byte[] bytes = CollatorWriter.writeRBC(rbc); 631 // ICUResourceWriter.ResourceBinary b = new ICUResourceWriter.ResourceBinary(); 632 // b.data = bytes; 633 // b.name = "%%CollationBin"; 634 // element.addAfter(b); 635 636 } catch (Exception e) { 637 System.err.println("Could not create Collation Binary"); 638 } 639 } 640 } 641 cur = cur.next; 642 } 643 } 644 645 /** 646 * Write the header section of the file. This section of the file currently contains:<br> 647 * -A 2 byte number containing the length (in bytes) of the header.<br> 648 * -Two "magic numbers" each 1 byte in size.<br> 649 * -The UDataInfo structure 650 * -The null terminated copyright string (if it should be written) 651 * 652 * @param out 653 * @param info 654 * @param copyright 655 */ writeBinaryHeader(FileOutputStream out, UDataInfo info, String copyright)656 private static void writeBinaryHeader(FileOutputStream out, UDataInfo info, String copyright) { 657 short headSize = 0; 658 byte[] magics = new byte[2]; 659 int pad = 0; 660 byte[] padding; 661 /* 662 * The header includes a 2 byte number containing the size of the header, 663 * two magic numbers each 1 byte in size, the UDataInfo structure, and the 664 * copyright plus null terminator. Subject to change. 665 */ 666 headSize += info.size + BYTES_TAKEN_BY_HEADER_SIZE + BYTES_TAKEN_BY_MAGIC1 + BYTES_TAKEN_BY_MAGIC2; 667 if (copyright != null && INCLUDE_COPYRIGHT) { 668 headSize += copyright.length() + 1; 669 } 670 if ((pad = pad16Bytes(headSize)) != 0) { 671 headSize += pad; 672 } 673 674 magics[0] = MAGIC1; 675 magics[1] = MAGIC2; 676 677 try { 678 // write the size of the header 679 out.write(shortToBytes(headSize)); 680 written += (shortToBytes(headSize)).length; 681 682 // write the two magic numbers 683 out.write(magics); 684 written += magics.length; 685 686 // write the UDataInfo structure 687 out.write(info.getByteArray()); 688 written += info.getByteArray().length; 689 690 // write the copyright and null terminating byte(s) if writing it 691 if (copyright != null && INCLUDE_COPYRIGHT) { 692 out.write((copyright + "\0").getBytes(CHARSET8)); 693 written += ((copyright + "\0").getBytes(CHARSET8)).length; 694 695 } 696 697 if (pad != 0) { 698 padding = new byte[pad]; 699 for (int i = 0; i < padding.length; i++) { 700 padding[i] = 0; 701 } 702 out.write(padding); 703 written += padding.length; 704 } 705 706 } catch (IOException e) { 707 printError(e.getMessage()); 708 e.printStackTrace(); 709 System.exit(1); 710 } 711 } 712 713 /** 714 * Write some information about the key string and then write a chunk of bytes which mirrors the 715 * SRBRoot->fkeys character buffer. This will be a list of null 716 * terminated strings. Each string pertains to a certain resource. This method also modifies the resources in 717 * 'resTop' by setting the keyStringOffset variable. The keyStringOffset variable is the number of bytes from 718 * the start of the key string that the resources key starts. For example: 719 * 720 * <p> 721 * In the 'en_PK' locale, you may have a Table resource with the key "Version." The Table contains a string resource 722 * with the key "1.31." 723 * </p> 724 * <p> 725 * If this were the whole of the locale data, the key string would be an encoded version of this: 726 * </p> 727 * 728 * "Version\01.31\0" 729 * 730 * <br> 731 * <br> 732 * In UTF-16 encoding, each character will take 2 bytes. <br> 733 * keyStringOffset for the table object would be 0. <br> 734 * keyStringOffset for the string resource would be = "Version".length() + 2 = 16 735 * 736 * 737 * @param out 738 * The output stream to write this to. 739 * @param resTop 740 * The top of the resource tree whose keys shall be written 741 */ writeKeyString(FileOutputStream out, ICUResourceWriter.Resource resTop)742 private static int writeKeyString(FileOutputStream out, ICUResourceWriter.Resource resTop) { 743 String keyList = ""; 744 byte[] padding = null; 745 int padBytes = 0; 746 int end; 747 int root; 748 byte[] rootBytes; 749 int[] indexes = new int[URES_INDEX_TOP]; 750 byte[] indexBytes = new byte[URES_INDEX_TOP * 4]; 751 byte[] keyBytes; 752 int usedOffset; 753 int sizeOfIndexes; 754 int sizeOfIndexesAndKeys; 755 int tableID; 756 757 // set flag so that we know which resource is the top of the tree 758 resTop.isTop = true; 759 760 sizeOfIndexes = (1 + URES_INDEX_TOP) * ICUResourceWriter.SIZE_OF_INT; 761 762 usedOffset = sizeOfIndexes; 763 764 // Build the String of keys 765 keyList = buildKeyList(keyList, resTop, usedOffset); 766 767 sizeOfIndexesAndKeys = sizeOfIndexes + keyList.length(); 768 usedOffset = sizeOfIndexesAndKeys + pad32(sizeOfIndexesAndKeys); 769 770 end = sizeOfIndexesAndKeys + resTop.sizeOfChildren; 771 772 // if it is not 16 byte aligned 773 if ((padBytes = pad32(sizeOfIndexesAndKeys)) != 0) { 774 padding = createPadding(padBytes); 775 if (padding != null) { 776 usedOffset += padding.length; 777 end += padding.length; 778 } 779 780 } 781 782 // build a set of 32 bits (in C this variable is called 'root' in reslist.c) 783 // the number of bytes included in the keyList, keyList padding, all the children 784 785 if (((ICUResourceWriter.ResourceTable) resTop).is32Bit()) { 786 tableID = (URES_TABLE32 << 28); 787 } else { 788 tableID = (URES_TABLE << 28); 789 } 790 root = (end >>> 2) | (tableID); 791 792 rootBytes = intToBytes(root); 793 794 end += resTop.size; 795 796 end += pad32(end); 797 798 indexes[URES_INDEX_LENGTH] = URES_INDEX_TOP; 799 indexes[URES_INDEX_STRINGS_TOP] = usedOffset >>> 2; 800 indexes[URES_INDEX_RESOURCES_TOP] = (end) >> 2; 801 indexes[URES_INDEX_BUNDLE_TOP] = indexes[URES_INDEX_RESOURCES_TOP]; 802 indexes[URES_INDEX_MAX_TABLE_LENGTH] = ICUResourceWriter.maxTableLength; 803 804 indexBytes = intArrayToBytes(indexes); 805 806 try { 807 // write the "root" object 808 out.write(rootBytes); 809 written += rootBytes.length; 810 811 // write the indexes array 812 out.write(indexBytes); 813 written += indexBytes.length; 814 815 // write the keyList and padding if nessicary 816 keyBytes = keyList.getBytes(CHARSET8); 817 out.write(keyBytes); 818 written += keyBytes.length; 819 820 if (padding != null) { 821 out.write(padding); 822 written += padding.length; 823 } 824 } catch (IOException e) { 825 printError("Could not write key string to file. " + e.getMessage()); 826 System.exit(1); 827 } 828 829 return usedOffset; 830 } 831 832 /** 833 * Recursively go through the whole tree and continue to add to the keyList. As this is done, 834 * set the keyStringOffset, numChildren, sizeOfChildren, and size variables. 835 * 836 * @param keyList 837 * The current string of keys. 838 * @param resTop 839 * The resource whose keys shall be written to the keyList. 840 * @return 841 */ buildKeyList(String keyList, ICUResourceWriter.Resource resTop, int usedOffset)842 private static String buildKeyList(String keyList, ICUResourceWriter.Resource resTop, int usedOffset) { 843 ICUResourceWriter.Resource current = resTop.first; 844 int x = 0; 845 846 // add this resources key to the list unless it is the top resource or doesn't have a key 847 if (!resTop.isTop && resTop.hasKey) { 848 // clean up quotes if any 849 if (resTop.name.indexOf("\"") >= 0) { 850 resTop.name = removeQuotes(resTop.name); 851 } 852 // set the keyStringOffset 853 resTop.keyStringOffset = usedOffset + (keyList.length() * BYTES_PER_UTF8_CHAR); 854 keyList += (resTop.name + "\0"); 855 856 } 857 858 // if it has children, call this method on them too 859 while (current != null) { 860 if (resTop instanceof ICUResourceWriter.ResourceArray 861 || resTop instanceof ICUResourceWriter.ResourceIntVector) { 862 current.hasKey = false; 863 } 864 865 keyList = buildKeyList(keyList, current, usedOffset); 866 x++; 867 868 // add the size of the current child to the parents sizeOfChildren 869 870 current = current.next; 871 } 872 873 // set the size of this object 874 resTop.setSize(); 875 876 resTop.numChildren = x; 877 return keyList; 878 } 879 880 /** 881 * Takes a 16 bit number and returns a two byte array. 0th element is lower byte, 1st element is upper byte. 882 * Ex: x = 28,000. In binary: 0110 1101 0110 0000. This method will return: 883 * [0] = 0110 0000 or 0x60 884 * [1] = 0110 1101 or 0x6D 885 */ shortToBytes(short x)886 private static byte[] shortToBytes(short x) { 887 byte[] b = new byte[2]; 888 b[1] = (byte) (x); // bitwise AND with the lower byte 889 b[0] = (byte) (x >>> 8); // shift four bits to the right and fill with zeros, and then bitwise and with the 890 // lower byte 891 return b; 892 } 893 894 /** 895 * Takes a 32 bit integer and returns an array of 4 bytes. 896 * 897 */ intToBytes(int x)898 private static byte[] intToBytes(int x) { 899 byte[] b = new byte[4]; 900 b[3] = (byte) (x); // just the last byte 901 902 x = x >>> 8; // shift each byte over one spot. 903 b[2] = (byte) (x); // just the last byte 904 905 x = x >>> 8; // shift each byte over one spot. 906 b[1] = (byte) (x); // just the last byte 907 908 x = x >>> 8; // shift each byte over one spot. 909 b[0] = (byte) (x); // just the last byte 910 911 return b; 912 } 913 914 /** 915 * Takes an array of integers and returns a byte array of the memory representation. 916 * 917 * @param x 918 * @return 919 */ intArrayToBytes(int[] x)920 private static byte[] intArrayToBytes(int[] x) { 921 byte[] b = new byte[x.length * 4]; 922 byte[] temp; 923 int i, z; 924 925 for (i = 0; i < x.length; i++) { 926 temp = intToBytes(x[i]); 927 for (z = 0; z < 4; z++) { 928 b[(i * 4) + z] = temp[z]; 929 } 930 } 931 return b; 932 } 933 934 /** 935 * calculate the padding to make things align with 32 bits (aka 4 bytes) 936 * 937 * @param x 938 * @return 939 */ pad32(int x)940 private static int pad32(int x) { 941 return ((x % 4) == 0) ? 0 : (4 - (x % 4)); 942 } 943 pad16Bytes(int x)944 private static int pad16Bytes(int x) { 945 return ((x % 16) == 0) ? 0 : (16 - (x % 16)); 946 } 947 948 /** 949 * for printing errors. 950 */ printError(String message)951 private static void printError(String message) { 952 953 System.err.println("LDML2ICUBinaryWriter : ERROR : " + message); 954 } 955 createPadding(int length)956 private static byte[] createPadding(int length) { 957 byte x = (byte) 0x00; 958 byte[] b = new byte[length]; 959 if (length == 0) { 960 return null; 961 } 962 for (int z = 0; z < b.length; z++) { 963 b[z] = x; 964 } 965 966 return b; 967 } 968 removeQuotes(String s)969 public static String removeQuotes(String s) { 970 String temp = s; 971 String temp2; 972 int x; 973 while (temp.indexOf("\"") >= 0) { 974 x = temp.indexOf("\""); 975 temp2 = temp.substring(0, x); 976 temp2 += temp.substring(x + 1, temp.length()); 977 temp = temp2; 978 } 979 980 return temp; 981 } 982 983 }