1 /* 2 ******************************************************************************* 3 * Copyright (C) 2003-2012, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ******************************************************************************* 6 */ 7 8 package org.unicode.cldr.icu; 9 10 import java.io.File; 11 import java.io.FileNotFoundException; 12 import java.io.FileOutputStream; 13 import java.io.IOException; 14 import java.util.Hashtable; 15 16 import com.ibm.icu.lang.UCharacter; 17 import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory; 18 import com.ibm.icu.text.UTF16; 19 20 /** 21 * The LDML2ICUBinaryWriter class is a set of methods which can be used 22 * to generate Binary (.res) files in the ICU Binary format. 23 * 24 * @author Brian Rower - June 2008 25 * 26 */ 27 public class LDML2ICUBinaryWriter { 28 /** 29 * This string is the copyright to be written into the file. 30 * In the C version, can be found in <I>icu4c_root</I>/source/common/unicode/uversion.h 31 */ 32 private static final String COPYRIGHT = " Copyright (C) 2012, International Business Machines Corporation and others. All Rights Reserved. "; 33 34 public static int written = 0; 35 36 /** 37 * Magic numbers!!!! 38 */ 39 private static final byte MAGIC1 = (byte) 0xda; 40 private static final byte MAGIC2 = 0x27; 41 42 private static boolean INCLUDE_COPYRIGHT = false; 43 /** 44 * The number of bytes it takes to write magic number 1. 45 */ 46 private static final short BYTES_TAKEN_BY_MAGIC1 = 1; 47 48 /** 49 * The number of bytes it takes to write magic number 2; 50 */ 51 private static final short BYTES_TAKEN_BY_MAGIC2 = 1; 52 53 /** 54 * The number of bytes that it takes to write the size of the header. 55 */ 56 private static final short BYTES_TAKEN_BY_HEADER_SIZE = 2; 57 58 /** 59 * The charsets to be used when encoding strings. 60 */ 61 public static final String CHARSET8 = "UTF-8"; 62 public static final String CHARSET16 = "UTF-16BE"; 63 64 /** 65 * The number of bytes that each character takes up. This is dependant on the encoding (see CHARSET above). 66 */ 67 private static final int BYTES_PER_UTF8_CHAR = 1; 68 69 /** 70 * Numeric constants for special elements. 71 */ 72 private static final int SPECIAL_NONE = 0; 73 private static final int SPECIAL_COLLATIONS = 1; 74 private static final int SPECIAL_COLLATIONELEMENTS = 2; 75 private static final int SPECIAL_DEPENDENCY = 3; 76 private static final int SPECIAL_TRANSLITERATOR = 4; 77 78 /** 79 * Numeric constants for types of resource items. 80 * 81 * @see ures_getType 82 * @stable ICU 2.0 83 */ 84 85 // **************************** ENUM Below is ported from C. See ures.h *********************** 86 87 /** Resource type constant for "no resource". @stable ICU 2.6 */ 88 public static final int URES_NONE = -1; 89 90 /** Resource type constant for 16-bit Unicode strings. @stable ICU 2.6 */ 91 public static final int URES_STRING = 0; 92 93 /** Resource type constant for binary data. @stable ICU 2.6 */ 94 public static final int URES_BINARY = 1; 95 96 /** Resource type constant for tables of key-value pairs. @stable ICU 2.6 */ 97 public static final int URES_TABLE = 2; 98 99 /** 100 * Resource type constant for aliases; 101 * internally stores a string which identifies the actual resource 102 * storing the data (can be in a different resource bundle). 103 * Resolved internally before delivering the actual resource through the API. 104 * 105 * @stable ICU 2.6 106 */ 107 public static final int URES_ALIAS = 3; 108 109 /** 110 * Internal use only. 111 * Alternative resource type constant for tables of key-value pairs. 112 * Never returned by ures_getType(). 113 * 114 * @internal 115 */ 116 public static final int URES_TABLE32 = 4; 117 118 /** 119 * Resource type constant for a single 28-bit integer, interpreted as 120 * signed or unsigned by the ures_getInt() or ures_getUInt() function. 121 * 122 * @see ures_getInt 123 * @see ures_getUInt 124 * @stable ICU 2.6 125 */ 126 public static final int URES_INT = 7; 127 128 /** Resource type constant for arrays of resources. @stable ICU 2.6 */ 129 public static final int URES_ARRAY = 8; 130 131 /** 132 * Resource type constant for vectors of 32-bit integers. 133 * 134 * @see ures_getIntVector 135 * @stable ICU 2.6 136 */ 137 public static final int URES_INT_VECTOR = 14; 138 139 public static final int URES_LIMIT = 16; 140 141 /* 142 * The enum below is ported from C. See uresdata.h 143 * 144 * It is used as index references for the array which will be written. 145 */ 146 /* [0] contains URES_INDEX_TOP==the length of indexes[] */ 147 private static final int URES_INDEX_LENGTH = 0; 148 /* [1] contains the top of the strings, same as the bottom of resources, rounded up */ 149 private static final int URES_INDEX_STRINGS_TOP = 1; 150 /* [2] contains the top of all resources */ 151 private static final int URES_INDEX_RESOURCES_TOP = 2; 152 /* [3] contains the top of the bundle, in case it were ever different from [2] */ 153 private static final int URES_INDEX_BUNDLE_TOP = 3; 154 /* [4] max. length of any table */ 155 private static final int URES_INDEX_MAX_TABLE_LENGTH = 4; 156 /* [5] attributes bit set, see URES_ATT_* (new in formatVersion 1.2) */ 157 // private static final int URES_INDEX_ATTRIBUTES = 5; 158 /* This one is the length of the array */ 159 private static final int URES_INDEX_TOP = 6; 160 161 // must be set if writing transliteration 162 private static Hashtable<String, String> ruleStringsHash = null; 163 main()164 public static void main() { 165 166 } 167 168 /** 169 * This method is called upon the top of an ICUResourceWriter.Resource 170 * in order to write the whole Resource tree into binary format. 171 * 172 * @param resTop 173 * The top of the resource tree that you would like written to file. This 174 * object should be a ICUResourceWriter.ResourceTable. 175 * @param outDir 176 * A string pointing to the path of the output directory. 177 * @param outFile 178 * The name of the output file. If filename has an extension other than .res 179 * (ex: .txt) this method will strip that extention and replace with .res. 180 */ writeBinaryFile(ICUResourceWriter.Resource resTop, String outDir, String outFile)181 public static void writeBinaryFile(ICUResourceWriter.Resource resTop, String outDir, String outFile) { 182 String fileName = ""; 183 int usedOffset = 0; 184 String directoryPath = ""; 185 FileOutputStream out; 186 UDataInfo info; 187 byte[] dataFormat; 188 byte[] formatVersion; 189 byte[] dataVersion; 190 byte[] padding; 191 192 // Do some checks on the file name 193 // if it has a period in it...get rid of everything after the period 194 if (outFile.indexOf('.') > -1) { 195 fileName = outFile.substring(0, outFile.indexOf('.')); 196 if (fileName.length() == 0) { 197 printError(outFile + " is not a valid file name."); 198 System.exit(1); 199 } 200 fileName = fileName + ".res"; 201 } else { 202 fileName = outFile + ".res"; 203 } 204 // add the .res part to the file name 205 206 // do some checks on the directory path 207 // replace all backslashes with forward slashes 208 directoryPath = outDir.replace('\\', '/'); 209 210 // if the path does not end in a slash, then we'll add one 211 if (directoryPath.charAt(directoryPath.length() - 1) != '/') { 212 directoryPath = directoryPath + "/"; 213 } 214 215 // create UDataInfo 216 // Data format is "ResB" 217 dataFormat = new byte[4]; 218 dataFormat[0] = 0x52; // R 219 dataFormat[1] = 0x65; // e 220 dataFormat[2] = 0x73; // s 221 dataFormat[3] = 0x42; // B 222 223 // Format version is 1.2.0.0 224 formatVersion = new byte[4]; 225 formatVersion[0] = 1; 226 formatVersion[1] = 2; 227 formatVersion[2] = 0; 228 formatVersion[3] = 0; 229 230 // data version is 1.4.0.0 231 dataVersion = new byte[4]; 232 dataVersion[0] = 1; 233 dataVersion[1] = 4; 234 dataVersion[2] = 0; 235 dataVersion[3] = 0; 236 237 // now that the file and directory name are formatted, lets try to create an output stream 238 try { 239 System.out.println("Creating file: " + directoryPath + fileName); 240 File f = new File(directoryPath, fileName); 241 out = new FileOutputStream(f); 242 243 info = new UDataInfo(UDataInfo.getSize(), (short) 0, UDataInfo.BIGENDIAN, UDataInfo.ASCII_FAMILY, 244 UDataInfo.SIZE_OF_UCHAR, (byte) 0, dataFormat, formatVersion, dataVersion); 245 246 // this method goes through the tree and looks for a table named CollationElements or Collations, and adds 247 // the 248 // appropriate data to the tree 249 dealWithSpecialElements(resTop, outDir); 250 251 // before we do anything with the resources, sort them 252 resTop.sort(); 253 254 // call writeBinaryHeader. 255 writeBinaryHeader(out, info, COPYRIGHT); 256 257 usedOffset = writeKeyString(out, resTop); 258 259 // Call writeBinary on the top of the Resource tree 260 261 usedOffset = resTop.writeBinary(out, usedOffset); 262 padding = createPadding(pad32(usedOffset)); 263 if (padding != null) { 264 out.write(padding); 265 written += padding.length; 266 } 267 out.close(); 268 System.out.println("Finished writing binary."); 269 } catch (FileNotFoundException e) { 270 printError(directoryPath + fileName + " could not be opened, please ensure the correct path is given."); 271 e.printStackTrace(); 272 System.exit(1); 273 } catch (SecurityException e) { 274 printError("access denied: " + directoryPath + fileName); 275 e.printStackTrace(); 276 System.exit(1); 277 } catch (Exception e) { 278 printError(e.getMessage()); 279 System.exit(1); 280 } 281 } 282 getSpecialType(ICUResourceWriter.Resource res)283 private static int getSpecialType(ICUResourceWriter.Resource res) { 284 if (!res.hasKey) { 285 return SPECIAL_NONE; 286 } 287 288 if (res.name.equals("CollationElements") && res instanceof ICUResourceWriter.ResourceTable) { 289 return SPECIAL_COLLATIONELEMENTS; 290 } 291 292 if (res.name.equals("collations") && res instanceof ICUResourceWriter.ResourceTable) { 293 return SPECIAL_COLLATIONS; 294 } 295 296 if (res.name.equals("depends") && res instanceof ICUResourceWriter.ResourceProcess) { 297 return SPECIAL_DEPENDENCY; 298 } 299 300 if (res instanceof ICUResourceWriter.ResourceProcess) { 301 if (((ICUResourceWriter.ResourceProcess) res).ext.equals(ICUResourceWriter.TRANSLITERATOR)) { 302 return SPECIAL_TRANSLITERATOR; 303 } 304 } 305 306 return SPECIAL_NONE; 307 } 308 309 /** 310 * 311 * Goes through the resource tree recursively and looks for a table named 312 * CollationElements, collations, dependency, or transliterator and adds the appropriate data 313 * 314 * @param top 315 * The top of the Resource Tree 316 */ dealWithSpecialElements(ICUResourceWriter.Resource top, String outDir)317 private static void dealWithSpecialElements(ICUResourceWriter.Resource top, String outDir) { 318 // if it's a table 319 if (top instanceof ICUResourceWriter.ResourceTable) { 320 // loop through all it's elements and check if they're anything specialCollationElements or Collation 321 ICUResourceWriter.Resource cur = top.first; 322 while (cur != null) { 323 switch (getSpecialType(cur)) { 324 case SPECIAL_COLLATIONELEMENTS: 325 addCollation(cur); 326 break; 327 case SPECIAL_COLLATIONS: 328 addCollationElements(cur); 329 break; 330 case SPECIAL_DEPENDENCY: 331 addDependency((ICUResourceWriter.ResourceTable) top, (ICUResourceWriter.ResourceProcess) cur, 332 outDir); 333 break; 334 case SPECIAL_TRANSLITERATOR: 335 addTransliteration((ICUResourceWriter.ResourceTable) top, (ICUResourceWriter.ResourceProcess) cur); 336 break; 337 case SPECIAL_NONE: 338 default: 339 dealWithSpecialElements(cur, outDir); 340 } 341 342 cur = cur.next; 343 } 344 } 345 // if it's not a table...don't do anything... 346 } 347 setRulesHash(Hashtable<String, String> hash)348 public static void setRulesHash(Hashtable<String, String> hash) { 349 ruleStringsHash = hash; 350 } 351 352 // Parallels the C function for parseTransliterator in parse.c of genrb addTransliteration(ICUResourceWriter.ResourceTable parent, ICUResourceWriter.ResourceProcess trans)353 private static void addTransliteration(ICUResourceWriter.ResourceTable parent, 354 ICUResourceWriter.ResourceProcess trans) { 355 if (ruleStringsHash == null) { 356 System.err.println("If you are processing transliteration, you must set the Rules Hashtable."); 357 System.exit(-1); 358 } 359 360 String dataString = ruleStringsHash.get(trans.val); 361 362 if (dataString == null) { 363 System.err.println("Could not find data for: " + trans.val); 364 System.exit(-1); 365 } 366 367 // strip out the unneeded stuff from the buffer (like comments and spaces and line breaks 368 dataString = stripRules(dataString); 369 370 // create a string resource containing the data and add it to the resource tree 371 // remove the ResourceProcess and add the String 372 373 ICUResourceWriter.ResourceString replacement = new ICUResourceWriter.ResourceString("Resource", dataString); 374 375 ICUResourceWriter.Resource current = parent.first; 376 377 // yes, we're using an address comparison below...because they should both be pointing the the same object when 378 // we find it. 379 if (current != trans) { 380 while (current != null && current.next != trans) { 381 current = current.next; 382 } 383 if (current != null) { 384 replacement.next = trans.next; 385 current.next = replacement; 386 } else { 387 System.err.println("An unexpected error has occured: Could not find Transliteration resource."); 388 System.exit(-1); 389 } 390 } else { 391 replacement.next = trans.next; 392 parent.first = replacement; 393 } 394 395 } 396 isUWhiteSpace(char c)397 private static boolean isUWhiteSpace(char c) { 398 return (c >= 0x0009 && c <= 0x2029 && (c <= 0x000D || c == 0x0020 || c == 0x0085 || 399 c == 0x200E || c == 0x200F || c >= 0x2028)); 400 } 401 isNewLine(char c)402 private static boolean isNewLine(char c) { 403 if (c == 0x000d || c == 0x000a) { 404 return true; 405 } 406 return false; 407 } 408 isPunctuation(char c)409 private static boolean isPunctuation(char c) { 410 int x = UCharacter.getType(c); 411 switch (x) { 412 case ECharacterCategory.CONNECTOR_PUNCTUATION: 413 case ECharacterCategory.DASH_PUNCTUATION: 414 case ECharacterCategory.END_PUNCTUATION: 415 case ECharacterCategory.FINAL_PUNCTUATION: 416 case ECharacterCategory.INITIAL_PUNCTUATION: 417 case ECharacterCategory.OTHER_PUNCTUATION: 418 case ECharacterCategory.START_PUNCTUATION: 419 return true; 420 default: 421 return false; 422 } 423 } 424 isControl(char c)425 private static boolean isControl(char c) { 426 int x = UCharacter.getType(c); 427 switch (x) { 428 case ECharacterCategory.CONTROL: 429 return true; 430 default: 431 return false; 432 } 433 } 434 435 // parallels the C++ function utrans_stripRules in rbt_pars.cpp in i18n project stripRules(String data)436 private static String stripRules(String data) { 437 String newData = ""; 438 int currentIndex = 0; 439 char curChar; 440 char curChar2 = '0'; 441 boolean needChar2 = false; 442 boolean quoted = false; 443 444 try { 445 446 while (currentIndex < data.length()) { 447 needChar2 = false; 448 curChar = data.charAt(currentIndex); 449 // if it's a quote, set the flag 450 if (curChar == '\'') { 451 quoted = !quoted; 452 } 453 // otherwise...if the quote flag is NOT set. 454 else if (!quoted) { 455 // IF comment... ignore comment lines ...starting with #....and until a carriage return or line feed 456 if (curChar == '#') { 457 // if the preceeding characters were whitepace or new lines, go back and get rid of them 458 459 while (newData.length() > 0 460 && (isNewLine(newData.charAt(newData.length() - 1)) || isUWhiteSpace(newData.charAt(newData 461 .length() - 1)))) { 462 if (newData.length() == 1) { 463 newData = ""; 464 } else { 465 newData = newData.substring(0, newData.length() - 2); 466 } 467 468 } 469 470 // move to the end of the line 471 while (!isNewLine(curChar) && currentIndex < data.length()) { 472 currentIndex++; 473 if (currentIndex < data.length()) { 474 curChar = data.charAt(currentIndex); 475 } 476 } 477 // grab the first character of this new line (no longer part of the comment 478 currentIndex++; 479 if (currentIndex < data.length()) { 480 curChar = data.charAt(currentIndex); 481 } 482 483 } else if (curChar == '\\') // OR if its an escape char //((UChar)0x005C) - \ 484 { 485 // skip over the \ and then skip any line breaks that may follow 486 do { 487 currentIndex++; 488 if (currentIndex < data.length()) { 489 curChar = data.charAt(currentIndex); 490 } 491 } while (isNewLine(curChar) && currentIndex < data.length()); 492 493 // if it's a u and there are 4 more characters after it 494 if (curChar == 'u' && (data.length() - currentIndex) >= 4) { 495 // convert it to a character from a codepoint (String)UTF16.valueOf(int) 496 497 String hexString = data.substring(currentIndex + 1, currentIndex + 5); 498 int codeNum = Integer.parseInt(hexString, 16); 499 String temp = UTF16.valueOf(codeNum); 500 char tempChar; 501 502 tempChar = temp.charAt(0); 503 504 // if its 0xFFFFFFFF 505 if (tempChar == 0xFFFFFFFF) { 506 System.err.println("Invalid character found while processing file."); 507 System.exit(-1); 508 } 509 // if NOT whitespace(isUWhiteSpace) && NOT a control character? && not punctuation 510 if (!isUWhiteSpace(tempChar) && !isPunctuation(tempChar) && !isControl(tempChar)) { 511 // set the current character to this character 512 curChar = tempChar; 513 currentIndex += 4; // the 4 numbers...will add one more for the u, already did one for 514 // the slash 515 if (temp.length() > 1) { 516 curChar2 = temp.charAt(1); 517 needChar2 = true; 518 } 519 } 520 521 } 522 523 } else if (curChar == '\'')// OR if it's a quote 524 { 525 quoted = !quoted; 526 } 527 } // end not quoted 528 529 if (isNewLine(curChar)) { 530 quoted = false; 531 // while we're not hitting the end of the string 532 while (currentIndex < data.length()) { 533 if (!isNewLine(curChar)) { 534 break; 535 } 536 currentIndex++; 537 if (currentIndex < data.length()) { 538 curChar = data.charAt(currentIndex); 539 } 540 } 541 continue; 542 } 543 544 // append the character to the new string, because we've decided it's ok 545 newData += curChar; 546 currentIndex++; 547 if (needChar2) { 548 newData += curChar2; 549 } 550 } // end loop 551 552 } catch (Exception e) { 553 System.err.println("Had a problem..."); 554 } 555 if (newData.length() > data.length()) { 556 return null; 557 } 558 return newData; 559 } 560 addDependency(ICUResourceWriter.ResourceTable parent, ICUResourceWriter.ResourceProcess dep, String outDir)561 private static void addDependency(ICUResourceWriter.ResourceTable parent, ICUResourceWriter.ResourceProcess dep, 562 String outDir) { 563 String filename; 564 File f; 565 566 filename = outDir; 567 if (!(outDir.charAt(outDir.length() - 1) == '/' || outDir.charAt(outDir.length() - 1) == '\\')) { 568 filename += "/"; 569 } 570 571 filename += dep.val; 572 573 f = new File(filename); 574 if (!f.exists()) { 575 System.err.println("WARNING: Could not find dependancy: " + filename); 576 } 577 // create the %%DEPENDENCY array with a string containing the path, add it to the table. 578 ICUResourceWriter.ResourceArray a = new ICUResourceWriter.ResourceArray(); 579 a.name = "%%DEPENDENCY"; 580 ICUResourceWriter.ResourceString str = new ICUResourceWriter.ResourceString(null, dep.val); 581 a.first = str; 582 dep.addAfter(a); 583 584 // Remove the ResourceProcess object and replace it with a ResourceString object. 585 ICUResourceWriter.ResourceString replacement = new ICUResourceWriter.ResourceString(dep.name, dep.val); 586 587 ICUResourceWriter.Resource current = parent.first; 588 589 // yes, we're using an address comparison below...because they should both be pointing the the same object when 590 // we find it. 591 while (current != null && current.next != dep) { 592 current = current.next; 593 } 594 replacement.next = dep.next; 595 current.next = replacement; 596 597 } 598 addCollationElements(ICUResourceWriter.Resource elementTable)599 private static void addCollationElements(ICUResourceWriter.Resource elementTable) { 600 // Element table name is "Collation" 601 // loops through sub tables of Collation and adds CollationBinary as nessisary 602 ICUResourceWriter.Resource cur = elementTable.first; 603 604 while (cur != null) { 605 addCollation(cur); 606 cur = cur.next; 607 } 608 } 609 addCollation(ICUResourceWriter.Resource element)610 private static void addCollation(ICUResourceWriter.Resource element) { 611 ICUResourceWriter.Resource cur = element.first; 612 613 while (cur != null) { 614 if (cur.hasKey && (cur instanceof ICUResourceWriter.ResourceString)) { 615 ICUResourceWriter.ResourceString strElement = (ICUResourceWriter.ResourceString) cur; 616 617 if (strElement.name.equals("Sequence")) { 618 try { 619 // RuleBasedCollator rbc = new RuleBasedCollator(strElement.val); 620 // TODO Generate proper binary data for Collator 621 /* 622 * currently CollatorWriter does not work properly 623 * Need to write something to generate proper bytes, 624 * bytes do not seem to exist at this time 625 * CollatorWriter was not committed to the ICU4J trunk, it currently lives in the bdrower 626 * subdirectory of icu4j in the IBM local cvs 627 */ 628 // byte[] bytes = CollatorWriter.writeRBC(rbc); 629 // ICUResourceWriter.ResourceBinary b = new ICUResourceWriter.ResourceBinary(); 630 // b.data = bytes; 631 // b.name = "%%CollationBin"; 632 // element.addAfter(b); 633 634 } catch (Exception e) { 635 System.err.println("Could not create Collation Binary"); 636 } 637 } 638 } 639 cur = cur.next; 640 } 641 } 642 643 /** 644 * Write the header section of the file. This section of the file currently contains:<br> 645 * -A 2 byte number containing the length (in bytes) of the header.<br> 646 * -Two "magic numbers" each 1 byte in size.<br> 647 * -The UDataInfo structure 648 * -The null terminated copyright string (if it should be written) 649 * 650 * @param out 651 * @param info 652 * @param copyright 653 */ writeBinaryHeader(FileOutputStream out, UDataInfo info, String copyright)654 private static void writeBinaryHeader(FileOutputStream out, UDataInfo info, String copyright) { 655 short headSize = 0; 656 byte[] magics = new byte[2]; 657 int pad = 0; 658 byte[] padding; 659 /* 660 * The header includes a 2 byte number containing the size of the header, 661 * two magic numbers each 1 byte in size, the UDataInfo structure, and the 662 * copyright plus null terminator. Subject to change. 663 */ 664 headSize += info.size + BYTES_TAKEN_BY_HEADER_SIZE + BYTES_TAKEN_BY_MAGIC1 + BYTES_TAKEN_BY_MAGIC2; 665 if (copyright != null && INCLUDE_COPYRIGHT) { 666 headSize += copyright.length() + 1; 667 } 668 if ((pad = pad16Bytes(headSize)) != 0) { 669 headSize += pad; 670 } 671 672 magics[0] = MAGIC1; 673 magics[1] = MAGIC2; 674 675 try { 676 // write the size of the header 677 out.write(shortToBytes(headSize)); 678 written += (shortToBytes(headSize)).length; 679 680 // write the two magic numbers 681 out.write(magics); 682 written += magics.length; 683 684 // write the UDataInfo structure 685 out.write(info.getByteArray()); 686 written += info.getByteArray().length; 687 688 // write the copyright and null terminating byte(s) if writing it 689 if (copyright != null && INCLUDE_COPYRIGHT) { 690 out.write((copyright + "\0").getBytes(CHARSET8)); 691 written += ((copyright + "\0").getBytes(CHARSET8)).length; 692 693 } 694 695 if (pad != 0) { 696 padding = new byte[pad]; 697 for (int i = 0; i < padding.length; i++) { 698 padding[i] = 0; 699 } 700 out.write(padding); 701 written += padding.length; 702 } 703 704 } catch (IOException e) { 705 printError(e.getMessage()); 706 e.printStackTrace(); 707 System.exit(1); 708 } 709 } 710 711 /** 712 * Write some information about the key string and then write a chunk of bytes which mirrors the 713 * SRBRoot->fkeys character buffer. This will be a list of null 714 * terminated strings. Each string pertains to a certain resource. This method also modifies the resources in 715 * 'resTop' by setting the keyStringOffset variable. The keyStringOffset variable is the number of bytes from 716 * the start of the key string that the resources key starts. For example: 717 * 718 * <p> 719 * In the 'en_PK' locale, you may have a Table resource with the key "Version." The Table contains a string resource 720 * with the key "1.31." 721 * </p> 722 * <p> 723 * If this were the whole of the locale data, the key string would be an encoded version of this: 724 * </p> 725 * 726 * "Version\01.31\0" 727 * 728 * <br> 729 * <br> 730 * In UTF-16 encoding, each character will take 2 bytes. <br> 731 * keyStringOffset for the table object would be 0. <br> 732 * keyStringOffset for the string resource would be = "Version".length() + 2 = 16 733 * 734 * 735 * @param out 736 * The output stream to write this to. 737 * @param resTop 738 * The top of the resource tree whose keys shall be written 739 */ writeKeyString(FileOutputStream out, ICUResourceWriter.Resource resTop)740 private static int writeKeyString(FileOutputStream out, ICUResourceWriter.Resource resTop) { 741 String keyList = ""; 742 byte[] padding = null; 743 int padBytes = 0; 744 int end; 745 int root; 746 byte[] rootBytes; 747 int[] indexes = new int[URES_INDEX_TOP]; 748 byte[] indexBytes = new byte[URES_INDEX_TOP * 4]; 749 byte[] keyBytes; 750 int usedOffset; 751 int sizeOfIndexes; 752 int sizeOfIndexesAndKeys; 753 int tableID; 754 755 // set flag so that we know which resource is the top of the tree 756 resTop.isTop = true; 757 758 sizeOfIndexes = (1 + URES_INDEX_TOP) * ICUResourceWriter.SIZE_OF_INT; 759 760 usedOffset = sizeOfIndexes; 761 762 // Build the String of keys 763 keyList = buildKeyList(keyList, resTop, usedOffset); 764 765 sizeOfIndexesAndKeys = sizeOfIndexes + keyList.length(); 766 usedOffset = sizeOfIndexesAndKeys + pad32(sizeOfIndexesAndKeys); 767 768 end = sizeOfIndexesAndKeys + resTop.sizeOfChildren; 769 770 // if it is not 16 byte aligned 771 if ((padBytes = pad32(sizeOfIndexesAndKeys)) != 0) { 772 padding = createPadding(padBytes); 773 if (padding != null) { 774 usedOffset += padding.length; 775 end += padding.length; 776 } 777 778 } 779 780 // build a set of 32 bits (in C this variable is called 'root' in reslist.c) 781 // the number of bytes included in the keyList, keyList padding, all the children 782 783 if (((ICUResourceWriter.ResourceTable) resTop).is32Bit()) { 784 tableID = (URES_TABLE32 << 28); 785 } else { 786 tableID = (URES_TABLE << 28); 787 } 788 root = (end >>> 2) | (tableID); 789 790 rootBytes = intToBytes(root); 791 792 end += resTop.size; 793 794 end += pad32(end); 795 796 indexes[URES_INDEX_LENGTH] = URES_INDEX_TOP; 797 indexes[URES_INDEX_STRINGS_TOP] = usedOffset >>> 2; 798 indexes[URES_INDEX_RESOURCES_TOP] = (end) >> 2; 799 indexes[URES_INDEX_BUNDLE_TOP] = indexes[URES_INDEX_RESOURCES_TOP]; 800 indexes[URES_INDEX_MAX_TABLE_LENGTH] = ICUResourceWriter.maxTableLength; 801 802 indexBytes = intArrayToBytes(indexes); 803 804 try { 805 // write the "root" object 806 out.write(rootBytes); 807 written += rootBytes.length; 808 809 // write the indexes array 810 out.write(indexBytes); 811 written += indexBytes.length; 812 813 // write the keyList and padding if nessicary 814 keyBytes = keyList.getBytes(CHARSET8); 815 out.write(keyBytes); 816 written += keyBytes.length; 817 818 if (padding != null) { 819 out.write(padding); 820 written += padding.length; 821 } 822 } catch (IOException e) { 823 printError("Could not write key string to file. " + e.getMessage()); 824 System.exit(1); 825 } 826 827 return usedOffset; 828 } 829 830 /** 831 * Recursively go through the whole tree and continue to add to the keyList. As this is done, 832 * set the keyStringOffset, numChildren, sizeOfChildren, and size variables. 833 * 834 * @param keyList 835 * The current string of keys. 836 * @param resTop 837 * The resource whose keys shall be written to the keyList. 838 * @return 839 */ buildKeyList(String keyList, ICUResourceWriter.Resource resTop, int usedOffset)840 private static String buildKeyList(String keyList, ICUResourceWriter.Resource resTop, int usedOffset) { 841 ICUResourceWriter.Resource current = resTop.first; 842 int x = 0; 843 844 // add this resources key to the list unless it is the top resource or doesn't have a key 845 if (!resTop.isTop && resTop.hasKey) { 846 // clean up quotes if any 847 if (resTop.name.indexOf("\"") >= 0) { 848 resTop.name = removeQuotes(resTop.name); 849 } 850 // set the keyStringOffset 851 resTop.keyStringOffset = usedOffset + (keyList.length() * BYTES_PER_UTF8_CHAR); 852 keyList += (resTop.name + "\0"); 853 854 } 855 856 // if it has children, call this method on them too 857 while (current != null) { 858 if (resTop instanceof ICUResourceWriter.ResourceArray 859 || resTop instanceof ICUResourceWriter.ResourceIntVector) { 860 current.hasKey = false; 861 } 862 863 keyList = buildKeyList(keyList, current, usedOffset); 864 x++; 865 866 // add the size of the current child to the parents sizeOfChildren 867 868 current = current.next; 869 } 870 871 // set the size of this object 872 resTop.setSize(); 873 874 resTop.numChildren = x; 875 return keyList; 876 } 877 878 /** 879 * Takes a 16 bit number and returns a two byte array. 0th element is lower byte, 1st element is upper byte. 880 * Ex: x = 28,000. In binary: 0110 1101 0110 0000. This method will return: 881 * [0] = 0110 0000 or 0x60 882 * [1] = 0110 1101 or 0x6D 883 */ shortToBytes(short x)884 private static byte[] shortToBytes(short x) { 885 byte[] b = new byte[2]; 886 b[1] = (byte) (x); // bitwise AND with the lower byte 887 b[0] = (byte) (x >>> 8); // shift four bits to the right and fill with zeros, and then bitwise and with the 888 // lower byte 889 return b; 890 } 891 892 /** 893 * Takes a 32 bit integer and returns an array of 4 bytes. 894 * 895 */ intToBytes(int x)896 private static byte[] intToBytes(int x) { 897 byte[] b = new byte[4]; 898 b[3] = (byte) (x); // just the last byte 899 900 x = x >>> 8; // shift each byte over one spot. 901 b[2] = (byte) (x); // just the last byte 902 903 x = x >>> 8; // shift each byte over one spot. 904 b[1] = (byte) (x); // just the last byte 905 906 x = x >>> 8; // shift each byte over one spot. 907 b[0] = (byte) (x); // just the last byte 908 909 return b; 910 } 911 912 /** 913 * Takes an array of integers and returns a byte array of the memory representation. 914 * 915 * @param x 916 * @return 917 */ intArrayToBytes(int[] x)918 private static byte[] intArrayToBytes(int[] x) { 919 byte[] b = new byte[x.length * 4]; 920 byte[] temp; 921 int i, z; 922 923 for (i = 0; i < x.length; i++) { 924 temp = intToBytes(x[i]); 925 for (z = 0; z < 4; z++) { 926 b[(i * 4) + z] = temp[z]; 927 } 928 } 929 return b; 930 } 931 932 /** 933 * calculate the padding to make things align with 32 bits (aka 4 bytes) 934 * 935 * @param x 936 * @return 937 */ pad32(int x)938 private static int pad32(int x) { 939 return ((x % 4) == 0) ? 0 : (4 - (x % 4)); 940 } 941 pad16Bytes(int x)942 private static int pad16Bytes(int x) { 943 return ((x % 16) == 0) ? 0 : (16 - (x % 16)); 944 } 945 946 /** 947 * for printing errors. 948 */ printError(String message)949 private static void printError(String message) { 950 951 System.err.println("LDML2ICUBinaryWriter : ERROR : " + message); 952 } 953 createPadding(int length)954 private static byte[] createPadding(int length) { 955 byte x = (byte) 0x00; 956 byte[] b = new byte[length]; 957 if (length == 0) { 958 return null; 959 } 960 for (int z = 0; z < b.length; z++) { 961 b[z] = x; 962 } 963 964 return b; 965 } 966 removeQuotes(String s)967 public static String removeQuotes(String s) { 968 String temp = s; 969 String temp2; 970 int x; 971 while (temp.indexOf("\"") >= 0) { 972 x = temp.indexOf("\""); 973 temp2 = temp.substring(0, x); 974 temp2 += temp.substring(x + 1, temp.length()); 975 temp = temp2; 976 } 977 978 return temp; 979 } 980 981 }