1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 1999-2005, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: rbbidata.h 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * RBBI data formats Includes 14 * 15 * Structs that describes the format of the Binary RBBI data, 16 * as it is stored in ICU's data file. 17 * 18 * RBBIDataWrapper - Instances of this class sit between the 19 * raw data structs and the RulesBasedBreakIterator objects 20 * that are created by applications. The wrapper class 21 * provides reference counting for the underlying data, 22 * and direct pointers to data that would not otherwise 23 * be accessible without ugly pointer arithmetic. The 24 * wrapper does not attempt to provide any higher level 25 * abstractions for the data itself. 26 * 27 * There will be only one instance of RBBIDataWrapper for any 28 * set of RBBI run time data being shared by instances 29 * (clones) of RulesBasedBreakIterator. 30 */ 31 32 #ifndef __RBBIDATA_H__ 33 #define __RBBIDATA_H__ 34 35 #include "unicode/utypes.h" 36 #include "unicode/udata.h" 37 #include "udataswp.h" 38 39 /** 40 * Swap RBBI data. See udataswp.h. 41 * @internal 42 */ 43 U_CAPI int32_t U_EXPORT2 44 ubrk_swap(const UDataSwapper *ds, 45 const void *inData, int32_t length, void *outData, 46 UErrorCode *pErrorCode); 47 48 #ifdef XP_CPLUSPLUS 49 50 #include "unicode/uobject.h" 51 #include "unicode/unistr.h" 52 #include "utrie.h" 53 54 U_NAMESPACE_BEGIN 55 56 /* 57 * The following structs map exactly onto the raw data from ICU common data file. 58 */ 59 struct RBBIDataHeader { 60 uint32_t fMagic; /* == 0xbla0 */ 61 uint8_t fFormatVersion[4]; /* Data Format. Same as the value in struct UDataInfo */ 62 /* if there is one associated with this data. */ 63 /* (version originates in rbbi, is copied to UDataInfo) */ 64 /* For ICU 3.2 and earlier, this field was */ 65 /* uint32_t fVersion */ 66 /* with a value of 1. */ 67 uint32_t fLength; /* Total length in bytes of this RBBI Data, */ 68 /* including all sections, not just the header. */ 69 uint32_t fCatCount; /* Number of character categories. */ 70 71 /* */ 72 /* Offsets and sizes of each of the subsections within the RBBI data. */ 73 /* All offsets are bytes from the start of the RBBIDataHeader. */ 74 /* All sizes are in bytes. */ 75 /* */ 76 uint32_t fFTable; /* forward state transition table. */ 77 uint32_t fFTableLen; 78 uint32_t fRTable; /* Offset to the reverse state transition table. */ 79 uint32_t fRTableLen; 80 uint32_t fSFTable; /* safe point forward transition table */ 81 uint32_t fSFTableLen; 82 uint32_t fSRTable; /* safe point reverse transition table */ 83 uint32_t fSRTableLen; 84 uint32_t fTrie; /* Offset to Trie data for character categories */ 85 uint32_t fTrieLen; 86 uint32_t fRuleSource; /* Offset to the source for for the break */ 87 uint32_t fRuleSourceLen; /* rules. Stored UChar *. */ 88 uint32_t fStatusTable; /* Offset to the table of rule status values */ 89 uint32_t fStatusTableLen; 90 91 uint32_t fReserved[6]; /* Reserved for expansion */ 92 93 }; 94 95 96 97 struct RBBIStateTableRow { 98 int16_t fAccepting; /* Non-zero if this row is for an accepting state. */ 99 /* Value 0: not an accepting state. */ 100 /* -1: Unconditional Accepting state. */ 101 /* positive: Look-ahead match has completed. */ 102 /* Actual boundary position happened earlier */ 103 /* Value here == fLookAhead in earlier */ 104 /* state, at actual boundary pos. */ 105 int16_t fLookAhead; /* Non-zero if this row is for a state that */ 106 /* corresponds to a '/' in the rule source. */ 107 /* Value is the same as the fAccepting */ 108 /* value for the rule (which will appear */ 109 /* in a different state. */ 110 int16_t fTagIdx; /* Non-zero if this row covers a {tagged} position */ 111 /* from a rule. Value is the index in the */ 112 /* StatusTable of the set of matching */ 113 /* tags (rule status values) */ 114 int16_t fReserved; 115 uint16_t fNextState[2]; /* Next State, indexed by char category. */ 116 /* Array Size is fNumCols from the */ 117 /* state table header. */ 118 /* CAUTION: see RBBITableBuilder::getTableSize() */ 119 /* before changing anything here. */ 120 }; 121 122 123 struct RBBIStateTable { 124 uint32_t fNumStates; /* Number of states. */ 125 uint32_t fRowLen; /* Length of a state table row, in bytes. */ 126 uint32_t fFlags; /* Option Flags for this state table */ 127 uint32_t fReserved; /* reserved */ 128 char fTableData[4]; /* First RBBIStateTableRow begins here. */ 129 /* (making it char[] simplifies ugly address */ 130 /* arithmetic for indexing variable length rows.) */ 131 }; 132 133 typedef enum { 134 RBBI_LOOKAHEAD_HARD_BREAK = 1, 135 RBBI_BOF_REQUIRED = 2 136 } RBBIStateTableFlags; 137 138 139 /* */ 140 /* The reference counting wrapper class */ 141 /* */ 142 class RBBIDataWrapper : public UMemory { 143 public: 144 RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status); 145 RBBIDataWrapper(UDataMemory* udm, UErrorCode &status); 146 ~RBBIDataWrapper(); 147 148 void init(const RBBIDataHeader *data, UErrorCode &status); 149 RBBIDataWrapper *addReference(); 150 void removeReference(); 151 UBool operator ==(const RBBIDataWrapper &other) const; 152 int32_t hashCode(); 153 const UnicodeString &getRuleSourceString() const; 154 #ifdef RBBI_DEBUG 155 void printData(); 156 void printTable(const char *heading, const RBBIStateTable *table); 157 #else 158 #define printData() 159 #define printTable(heading, table) 160 #endif 161 162 /* */ 163 /* Pointers to items within the data */ 164 /* */ 165 const RBBIDataHeader *fHeader; 166 const RBBIStateTable *fForwardTable; 167 const RBBIStateTable *fReverseTable; 168 const RBBIStateTable *fSafeFwdTable; 169 const RBBIStateTable *fSafeRevTable; 170 const UChar *fRuleSource; 171 const int32_t *fRuleStatusTable; 172 173 /* number of int32_t values in the rule status table. Used to sanity check indexing */ 174 int32_t fStatusMaxIdx; 175 176 UTrie fTrie; 177 178 private: 179 int32_t fRefCount; 180 UDataMemory *fUDataMem; 181 UnicodeString fRuleString; 182 183 RBBIDataWrapper(const RBBIDataWrapper &other); /* forbid copying of this class */ 184 RBBIDataWrapper &operator=(const RBBIDataWrapper &other); /* forbid copying of this class */ 185 }; 186 187 188 189 U_NAMESPACE_END 190 191 #endif /* C++ */ 192 193 #endif 194