• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5  *******************************************************************************
6  * Copyright (C) 1996-2015, International Business Machines Corporation and
7  * others. All Rights Reserved.
8  *******************************************************************************
9  */
10 
11 package ohos.global.icu.impl;
12 
13 import java.io.DataOutputStream;
14 import java.io.IOException;
15 import java.nio.ByteBuffer;
16 import java.util.Arrays;
17 
18 import ohos.global.icu.impl.ICUBinary.Authenticate;
19 import ohos.global.icu.text.RuleBasedBreakIterator;
20 
21 /**
22 * <p>Internal class used for Rule Based Break Iterators.</p>
23 * <p>This class provides access to the compiled break rule data, as
24 * it is stored in a .brk file. Refer to the file common/rbbidata.h from
25 * ICU4C for further details.
26  * @hide exposed on OHOS
27 */
28 public final class RBBIDataWrapper {
29 
30     /**
31      * A RBBI State Transition table, the form of the data used at run time in Java.
32      * These can be created from stored ICU data, or built from rules.
33      * The structure corresponds closely to struct RBBIStateTable in ICU4C.
34      * @hide exposed on OHOS
35      */
36     static public class RBBIStateTable {
37         /**
38          * Number of states (rows) in this table.
39          */
40         public int     fNumStates;
41         /**
42          * Length of a table row in bytes. Note mismatch with table data, which is short[].
43          */
44         public int     fRowLen;
45         /**
46          * Option Flags for this state table.
47          */
48         public int     fFlags;
49         /**
50          * Option Flags for this state table.
51          */
52         public int     fReserved;
53         /**
54          * Linear array of next state values, accessed as short[state, char_class]
55          */
56         public short[] fTable;
57 
RBBIStateTable()58         public RBBIStateTable() {
59         }
60 
get(ByteBuffer bytes, int length)61         static RBBIStateTable get(ByteBuffer bytes, int length) throws IOException {
62             if (length == 0) {
63                 return null;
64             }
65             if (length < 16) {
66                 throw new IOException("Invalid RBBI state table length.");
67             }
68             RBBIStateTable This = new RBBIStateTable();
69             This.fNumStates = bytes.getInt();
70             This.fRowLen    = bytes.getInt();
71             This.fFlags     = bytes.getInt();
72             This.fReserved  = bytes.getInt();
73             int lengthOfShorts = length - 16;   // length in bytes.
74             This.fTable     = ICUBinary.getShorts(bytes, lengthOfShorts / 2, lengthOfShorts & 1);
75             return This;
76         }
77 
put(DataOutputStream bytes)78         public int put(DataOutputStream bytes) throws IOException {
79             bytes.writeInt(fNumStates);
80             bytes.writeInt(fRowLen);
81             bytes.writeInt(fFlags);
82             bytes.writeInt(fReserved);
83             int tableLen = fRowLen * fNumStates / 2;  // fRowLen is bytes.
84             for (int i = 0; i < tableLen; i++) {
85                 bytes.writeShort(fTable[i]);
86             }
87             int bytesWritten = 16 + fRowLen * fNumStates;   // total bytes written,
88                                                             // including 16 for the header.
89             while (bytesWritten % 8 != 0) {
90                 bytes.writeByte(0);
91                 ++bytesWritten;
92             }
93             return bytesWritten;
94         }
95 
96         /**
97          * {@inheritDoc}
98          */
99         @Override
equals(Object other)100         public boolean equals (Object other) {
101             if (other == this) {
102                 return true;
103             }
104             if (!(other instanceof RBBIStateTable)) {
105                 return false;
106             }
107             RBBIStateTable otherST = (RBBIStateTable)other;
108             if (fNumStates != otherST.fNumStates) return false;
109             if (fRowLen    != otherST.fRowLen)    return false;
110             if (fFlags     != otherST.fFlags)     return false;
111             if (fReserved  != otherST.fReserved)  return false;
112             return Arrays.equals(fTable, otherST.fTable);
113         }
114     }
115 
116     /**
117      * Equals helper for state tables, including null handling.
118      */
equals(RBBIStateTable left, RBBIStateTable right)119     static public boolean equals(RBBIStateTable left, RBBIStateTable right) {
120         if (left == right) {
121             return true;
122         }
123         if (left == null || right == null) {
124             return false;
125         }
126         return left.equals(right);
127     }
128 
129 
130     //
131     // These fields are the ready-to-use compiled rule data, as
132     //   read from the file.
133     //
134     public RBBIDataHeader fHeader;
135 
136     public RBBIStateTable   fFTable;
137 
138     public RBBIStateTable   fRTable;
139 
140     public Trie2   fTrie;
141     public String  fRuleSource;
142     public int     fStatusTable[];
143 
144     public static final int DATA_FORMAT = 0x42726b20;     // "Brk "
145     public static final int FORMAT_VERSION = 0x05000000;  // 4.0.0.0
146 
147     private static final class IsAcceptable implements Authenticate {
148         @Override
isDataVersionAcceptable(byte version[])149         public boolean isDataVersionAcceptable(byte version[]) {
150             int intVersion = (version[0] << 24) + (version[1] << 16) + (version[2] << 8) + version[3];
151             return intVersion == FORMAT_VERSION;
152         }
153     }
154     private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
155 
156     //
157     // Indexes to fields in the ICU4C style binary form of the RBBI Data Header
158     //   Used by the rule compiler when flattening the data.
159     //
160     public final static int    DH_SIZE           = 20;
161     public final static int    DH_MAGIC          = 0;
162     public final static int    DH_FORMATVERSION  = 1;
163     public final static int    DH_LENGTH         = 2;
164     public final static int    DH_CATCOUNT       = 3;
165     public final static int    DH_FTABLE         = 4;
166     public final static int    DH_FTABLELEN      = 5;
167     public final static int    DH_RTABLE         = 6;
168     public final static int    DH_RTABLELEN      = 7;
169     public final static int    DH_TRIE           = 8;
170     public final static int    DH_TRIELEN        = 9;
171     public final static int    DH_RULESOURCE     = 10;
172     public final static int    DH_RULESOURCELEN  = 11;
173     public final static int    DH_STATUSTABLE    = 12;
174     public final static int    DH_STATUSTABLELEN = 13;
175 
176 
177     // Index offsets to the fields in a state table row.
178     //    Corresponds to struct RBBIStateTableRow in the C version.
179     //
180     /**
181      * offset to the "accepting" field in a state table row.
182      */
183     public final static int      ACCEPTING  = 0;
184     /**
185      * offset to the "lookahead" field in a state table row.
186      */
187     public final static int      LOOKAHEAD  = 1;
188     /**
189      * offset to the "tagIndex" field in a state table row.
190      */
191     public final static int      TAGIDX     = 2;
192     /**
193      * offset to the reserved field in a state table row.
194      */
195     public final static int      RESERVED   = 3;
196     /**
197      * offset to the start of the next states array in a state table row.
198      */
199     public final static int      NEXTSTATES = 4;
200 
201     //  Bit selectors for the "FLAGS" field of the state table header
202     //     enum RBBIStateTableFlags in the C version.
203     //
204     public final static int      RBBI_LOOKAHEAD_HARD_BREAK = 1;
205     public final static int      RBBI_BOF_REQUIRED         = 2;
206 
207     /**
208      * Data Header.  A struct-like class with the fields from the RBBI data file header.
209      * Not intended for public use, declared public for testing purposes only.
210      * @hide exposed on OHOS
211      */
212     public final static class RBBIDataHeader {
213         int         fMagic;         //  == 0xbla0
214         byte[]      fFormatVersion; //  For ICU 3.4 and later.
215         int         fLength;        //  Total length in bytes of this RBBI Data,
216                                     //      including all sections, not just the header.
217         /**
218          * the number of character categories.
219          */
220         public int  fCatCount;      //  Number of character categories.
221 
222         //
223         //  Offsets and sizes of each of the subsections within the RBBI data.
224         //  All offsets are bytes from the start of the RBBIDataHeader.
225         //  All sizes are in bytes.
226         //
227         int         fFTable;         //  forward state transition table.
228         int         fFTableLen;
229         int         fRTable;         //  Offset to the reverse state transition table.
230         int         fRTableLen;
231         int         fTrie;           //  Offset to Trie data for character categories
232         int         fTrieLen;
233         int         fRuleSource;     //  Offset to the source for for the break
234         int         fRuleSourceLen;  //    rules.  Stored UChar *.
235         int         fStatusTable;    // Offset to the table of rule status values
236         int         fStatusTableLen;
237 
RBBIDataHeader()238         public RBBIDataHeader() {
239             fMagic = 0;
240             fFormatVersion = new byte[4];
241         }
242     }
243 
244 
245     /**
246      * RBBI State Table Indexing Function.  Given a state number, return the
247      * array index of the start of the state table row for that state.
248      */
getRowIndex(int state)249     public int getRowIndex(int state){
250         return state * (fHeader.fCatCount + 4);
251     }
252 
RBBIDataWrapper()253     RBBIDataWrapper() {
254     }
255 
256     /**
257      *  Get an RBBIDataWrapper from an InputStream onto a pre-compiled set
258      *  of RBBI rules.
259      */
get(ByteBuffer bytes)260     public static RBBIDataWrapper get(ByteBuffer bytes) throws IOException {
261         RBBIDataWrapper This = new RBBIDataWrapper();
262 
263         ICUBinary.readHeader(bytes, DATA_FORMAT, IS_ACCEPTABLE);
264 
265         // Read in the RBBI data header...
266         This.fHeader = new  RBBIDataHeader();
267         This.fHeader.fMagic          = bytes.getInt();
268         This.fHeader.fFormatVersion[0] = bytes.get();
269         This.fHeader.fFormatVersion[1] = bytes.get();
270         This.fHeader.fFormatVersion[2] = bytes.get();
271         This.fHeader.fFormatVersion[3] = bytes.get();
272         This.fHeader.fLength         = bytes.getInt();
273         This.fHeader.fCatCount       = bytes.getInt();
274         This.fHeader.fFTable         = bytes.getInt();
275         This.fHeader.fFTableLen      = bytes.getInt();
276         This.fHeader.fRTable         = bytes.getInt();
277         This.fHeader.fRTableLen      = bytes.getInt();
278         This.fHeader.fTrie           = bytes.getInt();
279         This.fHeader.fTrieLen        = bytes.getInt();
280         This.fHeader.fRuleSource     = bytes.getInt();
281         This.fHeader.fRuleSourceLen  = bytes.getInt();
282         This.fHeader.fStatusTable    = bytes.getInt();
283         This.fHeader.fStatusTableLen = bytes.getInt();
284         ICUBinary.skipBytes(bytes, 6 * 4);    // uint32_t  fReserved[6];
285 
286 
287         if (This.fHeader.fMagic != 0xb1a0 || !IS_ACCEPTABLE.isDataVersionAcceptable(This.fHeader.fFormatVersion)) {
288             throw new IOException("Break Iterator Rule Data Magic Number Incorrect, or unsupported data version.");
289         }
290 
291         // Current position in the buffer.
292         int pos = DH_SIZE * 4;     // offset of end of header, which has DH_SIZE fields, all int32_t (4 bytes)
293 
294         //
295         // Read in the Forward state transition table as an array of shorts.
296         //
297 
298         //   Quick Sanity Check
299         if (This.fHeader.fFTable < pos || This.fHeader.fFTable > This.fHeader.fLength) {
300              throw new IOException("Break iterator Rule data corrupt");
301         }
302 
303         //    Skip over any padding preceding this table
304         ICUBinary.skipBytes(bytes, This.fHeader.fFTable - pos);
305         pos = This.fHeader.fFTable;
306 
307         This.fFTable = RBBIStateTable.get(bytes, This.fHeader.fFTableLen);
308         pos += This.fHeader.fFTableLen;
309 
310         //
311         // Read in the Reverse state table
312         //
313 
314         // Skip over any padding in the file
315         ICUBinary.skipBytes(bytes, This.fHeader.fRTable - pos);
316         pos = This.fHeader.fRTable;
317 
318         // Create & fill the table itself.
319         This.fRTable = RBBIStateTable.get(bytes, This.fHeader.fRTableLen);
320         pos += This.fHeader.fRTableLen;
321 
322         //
323         // Unserialize the Character categories TRIE
324         //     Because we can't be absolutely certain where the Trie deserialize will
325         //     leave the buffer, leave position unchanged.
326         //     The seek to the start of the next item following the TRIE will get us
327         //     back in sync.
328         //
329         ICUBinary.skipBytes(bytes, This.fHeader.fTrie - pos);  // seek buffer from end of
330         pos = This.fHeader.fTrie;               // previous section to the start of the trie
331 
332         bytes.mark();                           // Mark position of start of TRIE in the input
333                                                 //  and tell Java to keep the mark valid so long
334                                                 //  as we don't go more than 100 bytes past the
335                                                 //  past the end of the TRIE.
336 
337         This.fTrie = Trie2.createFromSerialized(bytes);  // Deserialize the TRIE, leaving buffer
338                                                 //  at an unknown position, preceding the
339                                                 //  padding between TRIE and following section.
340 
341         bytes.reset();                          // Move buffer back to marked position at
342                                                 //   the start of the serialized TRIE.  Now our
343                                                 //   "pos" variable and the buffer are in
344                                                 //   agreement.
345 
346         //
347         // Read the Rule Status Table
348         //
349         if (pos > This.fHeader.fStatusTable) {
350             throw new IOException("Break iterator Rule data corrupt");
351         }
352         ICUBinary.skipBytes(bytes, This.fHeader.fStatusTable - pos);
353         pos = This.fHeader.fStatusTable;
354         This.fStatusTable = ICUBinary.getInts(
355                 bytes, This.fHeader.fStatusTableLen / 4, This.fHeader.fStatusTableLen & 3);
356         pos += This.fHeader.fStatusTableLen;
357 
358         //
359         // Put the break rule source into a String
360         //
361         if (pos > This.fHeader.fRuleSource) {
362             throw new IOException("Break iterator Rule data corrupt");
363         }
364         ICUBinary.skipBytes(bytes, This.fHeader.fRuleSource - pos);
365         pos = This.fHeader.fRuleSource;
366         This.fRuleSource = ICUBinary.getString(
367                 bytes, This.fHeader.fRuleSourceLen / 2, This.fHeader.fRuleSourceLen & 1);
368 
369         if (RuleBasedBreakIterator.fDebugEnv!=null && RuleBasedBreakIterator.fDebugEnv.indexOf("data")>=0) {
370             This.dump(System.out);
371         }
372         return This;
373     }
374 
375     /** Debug function to display the break iterator data. */
dump(java.io.PrintStream out)376     public void dump(java.io.PrintStream out) {
377         if (fFTable == null) {
378             // There is no table. Fail early for testing purposes.
379             throw new NullPointerException();
380         }
381         out.println("RBBI Data Wrapper dump ...");
382         out.println();
383         out.println("Forward State Table");
384         dumpTable(out, fFTable);
385         out.println("Reverse State Table");
386         dumpTable(out, fRTable);
387 
388         dumpCharCategories(out);
389         out.println("Source Rules: " + fRuleSource);
390 
391     }
392 
393     /** Fixed width int-to-string conversion. */
intToString(int n, int width)394     static public String intToString(int n, int width) {
395         StringBuilder  dest = new StringBuilder(width);
396         dest.append(n);
397         while (dest.length() < width) {
398            dest.insert(0, ' ');
399         }
400         return dest.toString();
401     }
402 
403     /** Fixed width int-to-string conversion. */
intToHexString(int n, int width)404     static public String intToHexString(int n, int width) {
405         StringBuilder  dest = new StringBuilder(width);
406         dest.append(Integer.toHexString(n));
407         while (dest.length() < width) {
408            dest.insert(0, ' ');
409         }
410         return dest.toString();
411     }
412 
413     /** Dump a state table.  (A full set of RBBI rules has 4 state tables.)  */
dumpTable(java.io.PrintStream out, RBBIStateTable table)414     private void dumpTable(java.io.PrintStream out, RBBIStateTable table) {
415         if (table == null || table.fTable.length == 0)   {
416             out.println("  -- null -- ");
417         } else {
418             int n;
419             int state;
420             StringBuilder header = new StringBuilder(" Row  Acc Look  Tag");
421             for (n=0; n<fHeader.fCatCount; n++) {
422                 header.append(intToString(n, 5));
423             }
424             out.println(header.toString());
425             for (n=0; n<header.length(); n++) {
426                 out.print("-");
427             }
428             out.println();
429             for (state=0; state < table.fNumStates; state++) {
430                 dumpRow(out, table, state);
431             }
432             out.println();
433         }
434     }
435 
436     /**
437      * Dump (for debug) a single row of an RBBI state table
438      * @param table
439      * @param state
440      */
dumpRow(java.io.PrintStream out, RBBIStateTable table, int state)441     private void dumpRow(java.io.PrintStream out, RBBIStateTable table, int   state) {
442         StringBuilder dest = new StringBuilder(fHeader.fCatCount*5 + 20);
443         dest.append(intToString(state, 4));
444         int row = getRowIndex(state);
445         if (table.fTable[row+ACCEPTING] != 0) {
446            dest.append(intToString(table.fTable[row+ACCEPTING], 5));
447         }else {
448             dest.append("     ");
449         }
450         if (table.fTable[row+LOOKAHEAD] != 0) {
451             dest.append(intToString(table.fTable[row+LOOKAHEAD], 5));
452         }else {
453             dest.append("     ");
454         }
455         dest.append(intToString(table.fTable[row+TAGIDX], 5));
456 
457         for (int col=0; col<fHeader.fCatCount; col++) {
458             dest.append(intToString(table.fTable[row+NEXTSTATES+col], 5));
459         }
460 
461         out.println(dest);
462     }
463 
dumpCharCategories(java.io.PrintStream out)464     private void dumpCharCategories(java.io.PrintStream out) {
465         int n = fHeader.fCatCount;
466         String   catStrings[] = new  String[n+1];
467         int      rangeStart = 0;
468         int      rangeEnd = 0;
469         int      lastCat = -1;
470         int      char32;
471         int      category;
472         int      lastNewline[] = new int[n+1];
473 
474         for (category = 0; category <= fHeader.fCatCount; category ++) {
475             catStrings[category] = "";
476         }
477         out.println("\nCharacter Categories");
478         out.println("--------------------");
479         for (char32 = 0; char32<=0x10ffff; char32++) {
480             category = fTrie.get(char32);
481             category &= ~0x4000;            // Mask off dictionary bit.
482             if (category < 0 || category > fHeader.fCatCount) {
483                 out.println("Error, bad category " + Integer.toHexString(category) +
484                         " for char " + Integer.toHexString(char32));
485                 break;
486             }
487             if (category == lastCat ) {
488                 rangeEnd = char32;
489             } else {
490                 if (lastCat >= 0) {
491                     if (catStrings[lastCat].length() > lastNewline[lastCat] + 70) {
492                         lastNewline[lastCat] = catStrings[lastCat].length() + 10;
493                         catStrings[lastCat] += "\n       ";
494                     }
495 
496                     catStrings[lastCat] += " " + Integer.toHexString(rangeStart);
497                     if (rangeEnd != rangeStart) {
498                         catStrings[lastCat] += "-" + Integer.toHexString(rangeEnd);
499                     }
500                 }
501                 lastCat = category;
502                 rangeStart = rangeEnd = char32;
503             }
504         }
505         catStrings[lastCat] += " " + Integer.toHexString(rangeStart);
506         if (rangeEnd != rangeStart) {
507             catStrings[lastCat] += "-" + Integer.toHexString(rangeEnd);
508         }
509 
510         for (category = 0; category <= fHeader.fCatCount; category ++) {
511             out.println (intToString(category, 5) + "  " + catStrings[category]);
512         }
513         out.println();
514     }
515 
516 }
517