1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * Copyright (C) 1996-2015, International Business Machines Corporation and 7 * others. All Rights Reserved. 8 ******************************************************************************* 9 */ 10 11 package ohos.global.icu.impl; 12 13 import java.io.DataOutputStream; 14 import java.io.IOException; 15 import java.nio.ByteBuffer; 16 import java.util.Arrays; 17 18 import ohos.global.icu.impl.ICUBinary.Authenticate; 19 import ohos.global.icu.text.RuleBasedBreakIterator; 20 21 /** 22 * <p>Internal class used for Rule Based Break Iterators.</p> 23 * <p>This class provides access to the compiled break rule data, as 24 * it is stored in a .brk file. Refer to the file common/rbbidata.h from 25 * ICU4C for further details. 26 * @hide exposed on OHOS 27 */ 28 public final class RBBIDataWrapper { 29 30 /** 31 * A RBBI State Transition table, the form of the data used at run time in Java. 32 * These can be created from stored ICU data, or built from rules. 33 * The structure corresponds closely to struct RBBIStateTable in ICU4C. 34 * @hide exposed on OHOS 35 */ 36 static public class RBBIStateTable { 37 /** 38 * Number of states (rows) in this table. 39 */ 40 public int fNumStates; 41 /** 42 * Length of a table row in bytes. Note mismatch with table data, which is short[]. 43 */ 44 public int fRowLen; 45 /** 46 * Option Flags for this state table. 47 */ 48 public int fFlags; 49 /** 50 * Option Flags for this state table. 51 */ 52 public int fReserved; 53 /** 54 * Linear array of next state values, accessed as short[state, char_class] 55 */ 56 public short[] fTable; 57 RBBIStateTable()58 public RBBIStateTable() { 59 } 60 get(ByteBuffer bytes, int length)61 static RBBIStateTable get(ByteBuffer bytes, int length) throws IOException { 62 if (length == 0) { 63 return null; 64 } 65 if (length < 16) { 66 throw new IOException("Invalid RBBI state table length."); 67 } 68 RBBIStateTable This = new RBBIStateTable(); 69 This.fNumStates = bytes.getInt(); 70 This.fRowLen = bytes.getInt(); 71 This.fFlags = bytes.getInt(); 72 This.fReserved = bytes.getInt(); 73 int lengthOfShorts = length - 16; // length in bytes. 74 This.fTable = ICUBinary.getShorts(bytes, lengthOfShorts / 2, lengthOfShorts & 1); 75 return This; 76 } 77 put(DataOutputStream bytes)78 public int put(DataOutputStream bytes) throws IOException { 79 bytes.writeInt(fNumStates); 80 bytes.writeInt(fRowLen); 81 bytes.writeInt(fFlags); 82 bytes.writeInt(fReserved); 83 int tableLen = fRowLen * fNumStates / 2; // fRowLen is bytes. 84 for (int i = 0; i < tableLen; i++) { 85 bytes.writeShort(fTable[i]); 86 } 87 int bytesWritten = 16 + fRowLen * fNumStates; // total bytes written, 88 // including 16 for the header. 89 while (bytesWritten % 8 != 0) { 90 bytes.writeByte(0); 91 ++bytesWritten; 92 } 93 return bytesWritten; 94 } 95 96 /** 97 * {@inheritDoc} 98 */ 99 @Override equals(Object other)100 public boolean equals (Object other) { 101 if (other == this) { 102 return true; 103 } 104 if (!(other instanceof RBBIStateTable)) { 105 return false; 106 } 107 RBBIStateTable otherST = (RBBIStateTable)other; 108 if (fNumStates != otherST.fNumStates) return false; 109 if (fRowLen != otherST.fRowLen) return false; 110 if (fFlags != otherST.fFlags) return false; 111 if (fReserved != otherST.fReserved) return false; 112 return Arrays.equals(fTable, otherST.fTable); 113 } 114 } 115 116 /** 117 * Equals helper for state tables, including null handling. 118 */ equals(RBBIStateTable left, RBBIStateTable right)119 static public boolean equals(RBBIStateTable left, RBBIStateTable right) { 120 if (left == right) { 121 return true; 122 } 123 if (left == null || right == null) { 124 return false; 125 } 126 return left.equals(right); 127 } 128 129 130 // 131 // These fields are the ready-to-use compiled rule data, as 132 // read from the file. 133 // 134 public RBBIDataHeader fHeader; 135 136 public RBBIStateTable fFTable; 137 138 public RBBIStateTable fRTable; 139 140 public Trie2 fTrie; 141 public String fRuleSource; 142 public int fStatusTable[]; 143 144 public static final int DATA_FORMAT = 0x42726b20; // "Brk " 145 public static final int FORMAT_VERSION = 0x05000000; // 4.0.0.0 146 147 private static final class IsAcceptable implements Authenticate { 148 @Override isDataVersionAcceptable(byte version[])149 public boolean isDataVersionAcceptable(byte version[]) { 150 int intVersion = (version[0] << 24) + (version[1] << 16) + (version[2] << 8) + version[3]; 151 return intVersion == FORMAT_VERSION; 152 } 153 } 154 private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable(); 155 156 // 157 // Indexes to fields in the ICU4C style binary form of the RBBI Data Header 158 // Used by the rule compiler when flattening the data. 159 // 160 public final static int DH_SIZE = 20; 161 public final static int DH_MAGIC = 0; 162 public final static int DH_FORMATVERSION = 1; 163 public final static int DH_LENGTH = 2; 164 public final static int DH_CATCOUNT = 3; 165 public final static int DH_FTABLE = 4; 166 public final static int DH_FTABLELEN = 5; 167 public final static int DH_RTABLE = 6; 168 public final static int DH_RTABLELEN = 7; 169 public final static int DH_TRIE = 8; 170 public final static int DH_TRIELEN = 9; 171 public final static int DH_RULESOURCE = 10; 172 public final static int DH_RULESOURCELEN = 11; 173 public final static int DH_STATUSTABLE = 12; 174 public final static int DH_STATUSTABLELEN = 13; 175 176 177 // Index offsets to the fields in a state table row. 178 // Corresponds to struct RBBIStateTableRow in the C version. 179 // 180 /** 181 * offset to the "accepting" field in a state table row. 182 */ 183 public final static int ACCEPTING = 0; 184 /** 185 * offset to the "lookahead" field in a state table row. 186 */ 187 public final static int LOOKAHEAD = 1; 188 /** 189 * offset to the "tagIndex" field in a state table row. 190 */ 191 public final static int TAGIDX = 2; 192 /** 193 * offset to the reserved field in a state table row. 194 */ 195 public final static int RESERVED = 3; 196 /** 197 * offset to the start of the next states array in a state table row. 198 */ 199 public final static int NEXTSTATES = 4; 200 201 // Bit selectors for the "FLAGS" field of the state table header 202 // enum RBBIStateTableFlags in the C version. 203 // 204 public final static int RBBI_LOOKAHEAD_HARD_BREAK = 1; 205 public final static int RBBI_BOF_REQUIRED = 2; 206 207 /** 208 * Data Header. A struct-like class with the fields from the RBBI data file header. 209 * Not intended for public use, declared public for testing purposes only. 210 * @hide exposed on OHOS 211 */ 212 public final static class RBBIDataHeader { 213 int fMagic; // == 0xbla0 214 byte[] fFormatVersion; // For ICU 3.4 and later. 215 int fLength; // Total length in bytes of this RBBI Data, 216 // including all sections, not just the header. 217 /** 218 * the number of character categories. 219 */ 220 public int fCatCount; // Number of character categories. 221 222 // 223 // Offsets and sizes of each of the subsections within the RBBI data. 224 // All offsets are bytes from the start of the RBBIDataHeader. 225 // All sizes are in bytes. 226 // 227 int fFTable; // forward state transition table. 228 int fFTableLen; 229 int fRTable; // Offset to the reverse state transition table. 230 int fRTableLen; 231 int fTrie; // Offset to Trie data for character categories 232 int fTrieLen; 233 int fRuleSource; // Offset to the source for for the break 234 int fRuleSourceLen; // rules. Stored UChar *. 235 int fStatusTable; // Offset to the table of rule status values 236 int fStatusTableLen; 237 RBBIDataHeader()238 public RBBIDataHeader() { 239 fMagic = 0; 240 fFormatVersion = new byte[4]; 241 } 242 } 243 244 245 /** 246 * RBBI State Table Indexing Function. Given a state number, return the 247 * array index of the start of the state table row for that state. 248 */ getRowIndex(int state)249 public int getRowIndex(int state){ 250 return state * (fHeader.fCatCount + 4); 251 } 252 RBBIDataWrapper()253 RBBIDataWrapper() { 254 } 255 256 /** 257 * Get an RBBIDataWrapper from an InputStream onto a pre-compiled set 258 * of RBBI rules. 259 */ get(ByteBuffer bytes)260 public static RBBIDataWrapper get(ByteBuffer bytes) throws IOException { 261 RBBIDataWrapper This = new RBBIDataWrapper(); 262 263 ICUBinary.readHeader(bytes, DATA_FORMAT, IS_ACCEPTABLE); 264 265 // Read in the RBBI data header... 266 This.fHeader = new RBBIDataHeader(); 267 This.fHeader.fMagic = bytes.getInt(); 268 This.fHeader.fFormatVersion[0] = bytes.get(); 269 This.fHeader.fFormatVersion[1] = bytes.get(); 270 This.fHeader.fFormatVersion[2] = bytes.get(); 271 This.fHeader.fFormatVersion[3] = bytes.get(); 272 This.fHeader.fLength = bytes.getInt(); 273 This.fHeader.fCatCount = bytes.getInt(); 274 This.fHeader.fFTable = bytes.getInt(); 275 This.fHeader.fFTableLen = bytes.getInt(); 276 This.fHeader.fRTable = bytes.getInt(); 277 This.fHeader.fRTableLen = bytes.getInt(); 278 This.fHeader.fTrie = bytes.getInt(); 279 This.fHeader.fTrieLen = bytes.getInt(); 280 This.fHeader.fRuleSource = bytes.getInt(); 281 This.fHeader.fRuleSourceLen = bytes.getInt(); 282 This.fHeader.fStatusTable = bytes.getInt(); 283 This.fHeader.fStatusTableLen = bytes.getInt(); 284 ICUBinary.skipBytes(bytes, 6 * 4); // uint32_t fReserved[6]; 285 286 287 if (This.fHeader.fMagic != 0xb1a0 || !IS_ACCEPTABLE.isDataVersionAcceptable(This.fHeader.fFormatVersion)) { 288 throw new IOException("Break Iterator Rule Data Magic Number Incorrect, or unsupported data version."); 289 } 290 291 // Current position in the buffer. 292 int pos = DH_SIZE * 4; // offset of end of header, which has DH_SIZE fields, all int32_t (4 bytes) 293 294 // 295 // Read in the Forward state transition table as an array of shorts. 296 // 297 298 // Quick Sanity Check 299 if (This.fHeader.fFTable < pos || This.fHeader.fFTable > This.fHeader.fLength) { 300 throw new IOException("Break iterator Rule data corrupt"); 301 } 302 303 // Skip over any padding preceding this table 304 ICUBinary.skipBytes(bytes, This.fHeader.fFTable - pos); 305 pos = This.fHeader.fFTable; 306 307 This.fFTable = RBBIStateTable.get(bytes, This.fHeader.fFTableLen); 308 pos += This.fHeader.fFTableLen; 309 310 // 311 // Read in the Reverse state table 312 // 313 314 // Skip over any padding in the file 315 ICUBinary.skipBytes(bytes, This.fHeader.fRTable - pos); 316 pos = This.fHeader.fRTable; 317 318 // Create & fill the table itself. 319 This.fRTable = RBBIStateTable.get(bytes, This.fHeader.fRTableLen); 320 pos += This.fHeader.fRTableLen; 321 322 // 323 // Unserialize the Character categories TRIE 324 // Because we can't be absolutely certain where the Trie deserialize will 325 // leave the buffer, leave position unchanged. 326 // The seek to the start of the next item following the TRIE will get us 327 // back in sync. 328 // 329 ICUBinary.skipBytes(bytes, This.fHeader.fTrie - pos); // seek buffer from end of 330 pos = This.fHeader.fTrie; // previous section to the start of the trie 331 332 bytes.mark(); // Mark position of start of TRIE in the input 333 // and tell Java to keep the mark valid so long 334 // as we don't go more than 100 bytes past the 335 // past the end of the TRIE. 336 337 This.fTrie = Trie2.createFromSerialized(bytes); // Deserialize the TRIE, leaving buffer 338 // at an unknown position, preceding the 339 // padding between TRIE and following section. 340 341 bytes.reset(); // Move buffer back to marked position at 342 // the start of the serialized TRIE. Now our 343 // "pos" variable and the buffer are in 344 // agreement. 345 346 // 347 // Read the Rule Status Table 348 // 349 if (pos > This.fHeader.fStatusTable) { 350 throw new IOException("Break iterator Rule data corrupt"); 351 } 352 ICUBinary.skipBytes(bytes, This.fHeader.fStatusTable - pos); 353 pos = This.fHeader.fStatusTable; 354 This.fStatusTable = ICUBinary.getInts( 355 bytes, This.fHeader.fStatusTableLen / 4, This.fHeader.fStatusTableLen & 3); 356 pos += This.fHeader.fStatusTableLen; 357 358 // 359 // Put the break rule source into a String 360 // 361 if (pos > This.fHeader.fRuleSource) { 362 throw new IOException("Break iterator Rule data corrupt"); 363 } 364 ICUBinary.skipBytes(bytes, This.fHeader.fRuleSource - pos); 365 pos = This.fHeader.fRuleSource; 366 This.fRuleSource = ICUBinary.getString( 367 bytes, This.fHeader.fRuleSourceLen / 2, This.fHeader.fRuleSourceLen & 1); 368 369 if (RuleBasedBreakIterator.fDebugEnv!=null && RuleBasedBreakIterator.fDebugEnv.indexOf("data")>=0) { 370 This.dump(System.out); 371 } 372 return This; 373 } 374 375 /** Debug function to display the break iterator data. */ dump(java.io.PrintStream out)376 public void dump(java.io.PrintStream out) { 377 if (fFTable == null) { 378 // There is no table. Fail early for testing purposes. 379 throw new NullPointerException(); 380 } 381 out.println("RBBI Data Wrapper dump ..."); 382 out.println(); 383 out.println("Forward State Table"); 384 dumpTable(out, fFTable); 385 out.println("Reverse State Table"); 386 dumpTable(out, fRTable); 387 388 dumpCharCategories(out); 389 out.println("Source Rules: " + fRuleSource); 390 391 } 392 393 /** Fixed width int-to-string conversion. */ intToString(int n, int width)394 static public String intToString(int n, int width) { 395 StringBuilder dest = new StringBuilder(width); 396 dest.append(n); 397 while (dest.length() < width) { 398 dest.insert(0, ' '); 399 } 400 return dest.toString(); 401 } 402 403 /** Fixed width int-to-string conversion. */ intToHexString(int n, int width)404 static public String intToHexString(int n, int width) { 405 StringBuilder dest = new StringBuilder(width); 406 dest.append(Integer.toHexString(n)); 407 while (dest.length() < width) { 408 dest.insert(0, ' '); 409 } 410 return dest.toString(); 411 } 412 413 /** Dump a state table. (A full set of RBBI rules has 4 state tables.) */ dumpTable(java.io.PrintStream out, RBBIStateTable table)414 private void dumpTable(java.io.PrintStream out, RBBIStateTable table) { 415 if (table == null || table.fTable.length == 0) { 416 out.println(" -- null -- "); 417 } else { 418 int n; 419 int state; 420 StringBuilder header = new StringBuilder(" Row Acc Look Tag"); 421 for (n=0; n<fHeader.fCatCount; n++) { 422 header.append(intToString(n, 5)); 423 } 424 out.println(header.toString()); 425 for (n=0; n<header.length(); n++) { 426 out.print("-"); 427 } 428 out.println(); 429 for (state=0; state < table.fNumStates; state++) { 430 dumpRow(out, table, state); 431 } 432 out.println(); 433 } 434 } 435 436 /** 437 * Dump (for debug) a single row of an RBBI state table 438 * @param table 439 * @param state 440 */ dumpRow(java.io.PrintStream out, RBBIStateTable table, int state)441 private void dumpRow(java.io.PrintStream out, RBBIStateTable table, int state) { 442 StringBuilder dest = new StringBuilder(fHeader.fCatCount*5 + 20); 443 dest.append(intToString(state, 4)); 444 int row = getRowIndex(state); 445 if (table.fTable[row+ACCEPTING] != 0) { 446 dest.append(intToString(table.fTable[row+ACCEPTING], 5)); 447 }else { 448 dest.append(" "); 449 } 450 if (table.fTable[row+LOOKAHEAD] != 0) { 451 dest.append(intToString(table.fTable[row+LOOKAHEAD], 5)); 452 }else { 453 dest.append(" "); 454 } 455 dest.append(intToString(table.fTable[row+TAGIDX], 5)); 456 457 for (int col=0; col<fHeader.fCatCount; col++) { 458 dest.append(intToString(table.fTable[row+NEXTSTATES+col], 5)); 459 } 460 461 out.println(dest); 462 } 463 dumpCharCategories(java.io.PrintStream out)464 private void dumpCharCategories(java.io.PrintStream out) { 465 int n = fHeader.fCatCount; 466 String catStrings[] = new String[n+1]; 467 int rangeStart = 0; 468 int rangeEnd = 0; 469 int lastCat = -1; 470 int char32; 471 int category; 472 int lastNewline[] = new int[n+1]; 473 474 for (category = 0; category <= fHeader.fCatCount; category ++) { 475 catStrings[category] = ""; 476 } 477 out.println("\nCharacter Categories"); 478 out.println("--------------------"); 479 for (char32 = 0; char32<=0x10ffff; char32++) { 480 category = fTrie.get(char32); 481 category &= ~0x4000; // Mask off dictionary bit. 482 if (category < 0 || category > fHeader.fCatCount) { 483 out.println("Error, bad category " + Integer.toHexString(category) + 484 " for char " + Integer.toHexString(char32)); 485 break; 486 } 487 if (category == lastCat ) { 488 rangeEnd = char32; 489 } else { 490 if (lastCat >= 0) { 491 if (catStrings[lastCat].length() > lastNewline[lastCat] + 70) { 492 lastNewline[lastCat] = catStrings[lastCat].length() + 10; 493 catStrings[lastCat] += "\n "; 494 } 495 496 catStrings[lastCat] += " " + Integer.toHexString(rangeStart); 497 if (rangeEnd != rangeStart) { 498 catStrings[lastCat] += "-" + Integer.toHexString(rangeEnd); 499 } 500 } 501 lastCat = category; 502 rangeStart = rangeEnd = char32; 503 } 504 } 505 catStrings[lastCat] += " " + Integer.toHexString(rangeStart); 506 if (rangeEnd != rangeStart) { 507 catStrings[lastCat] += "-" + Integer.toHexString(rangeEnd); 508 } 509 510 for (category = 0; category <= fHeader.fCatCount; category ++) { 511 out.println (intToString(category, 5) + " " + catStrings[category]); 512 } 513 out.println(); 514 } 515 516 } 517