1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 *************************************************************************** 5 * Copyright (C) 1999-2016 International Business Machines Corporation * 6 * and others. All rights reserved. * 7 *************************************************************************** 8 9 ********************************************************************** 10 * Date Name Description 11 * 10/22/99 alan Creation. 12 * 11/11/99 rgillam Complete port from Java. 13 ********************************************************************** 14 */ 15 16 #ifndef RBBI_H 17 #define RBBI_H 18 19 #include "unicode/utypes.h" 20 21 #if U_SHOW_CPLUSPLUS_API 22 23 /** 24 * \file 25 * \brief C++ API: Rule Based Break Iterator 26 */ 27 28 #if !UCONFIG_NO_BREAK_ITERATION 29 30 #include "unicode/brkiter.h" 31 #include "unicode/udata.h" 32 #include "unicode/parseerr.h" 33 #include "unicode/schriter.h" 34 35 struct UCPTrie; 36 37 U_NAMESPACE_BEGIN 38 39 /** @internal */ 40 class LanguageBreakEngine; 41 struct RBBIDataHeader; 42 class RBBIDataWrapper; 43 class UnhandledEngine; 44 class UStack; 45 46 /** 47 * 48 * A subclass of BreakIterator whose behavior is specified using a list of rules. 49 * <p>Instances of this class are most commonly created by the factory methods of 50 * BreakIterator::createWordInstance(), BreakIterator::createLineInstance(), etc., 51 * and then used via the abstract API in class BreakIterator</p> 52 * 53 * <p>See the ICU User Guide for information on Break Iterator Rules.</p> 54 * 55 * <p>This class is not intended to be subclassed.</p> 56 */ 57 class U_COMMON_API RuleBasedBreakIterator /*final*/ : public BreakIterator { 58 59 private: 60 /** 61 * The UText through which this BreakIterator accesses the text 62 * @internal (private) 63 */ 64 UText fText = UTEXT_INITIALIZER; 65 66 #ifndef U_HIDE_INTERNAL_API 67 public: 68 #endif /* U_HIDE_INTERNAL_API */ 69 /** 70 * The rule data for this BreakIterator instance. 71 * Not for general use; Public only for testing purposes. 72 * @internal 73 */ 74 RBBIDataWrapper *fData = nullptr; 75 76 private: 77 /** 78 * The saved error code associated with this break iterator. 79 * This is the value to be returned by copyErrorTo(). 80 */ 81 UErrorCode fErrorCode = U_ZERO_ERROR; 82 83 /** 84 * The current position of the iterator. Pinned, 0 < fPosition <= text.length. 85 * Never has the value UBRK_DONE (-1). 86 */ 87 int32_t fPosition = 0; 88 89 /** 90 * TODO: 91 */ 92 int32_t fRuleStatusIndex = 0; 93 94 /** 95 * Cache of previously determined boundary positions. 96 */ 97 class BreakCache; 98 BreakCache *fBreakCache = nullptr; 99 100 /** 101 * Cache of boundary positions within a region of text that has been 102 * sub-divided by dictionary based breaking. 103 */ 104 class DictionaryCache; 105 DictionaryCache *fDictionaryCache = nullptr; 106 107 /** 108 * 109 * If present, UStack of LanguageBreakEngine objects that might handle 110 * dictionary characters. Searched from top to bottom to find an object to 111 * handle a given character. 112 * @internal (private) 113 */ 114 UStack *fLanguageBreakEngines = nullptr; 115 116 /** 117 * 118 * If present, the special LanguageBreakEngine used for handling 119 * characters that are in the dictionary set, but not handled by any 120 * LanguageBreakEngine. 121 * @internal (private) 122 */ 123 UnhandledEngine *fUnhandledBreakEngine = nullptr; 124 125 /** 126 * Counter for the number of characters encountered with the "dictionary" 127 * flag set. 128 * @internal (private) 129 */ 130 uint32_t fDictionaryCharCount = 0; 131 132 /** 133 * A character iterator that refers to the same text as the UText, above. 134 * Only included for compatibility with old API, which was based on CharacterIterators. 135 * Value may be adopted from outside, or one of fSCharIter or fDCharIter, below. 136 */ 137 CharacterIterator *fCharIter = &fSCharIter; 138 139 /** 140 * When the input text is provided by a UnicodeString, this will point to 141 * a characterIterator that wraps that data. Needed only for the 142 * implementation of getText(), a backwards compatibility issue. 143 */ 144 UCharCharacterIterator fSCharIter {u"", 0}; 145 146 /** 147 * True when iteration has run off the end, and iterator functions should return UBRK_DONE. 148 */ 149 bool fDone = false; 150 151 /** 152 * Array of look-ahead tentative results. 153 */ 154 int32_t *fLookAheadMatches = nullptr; 155 156 /** 157 * A flag to indicate if phrase based breaking is enabled. 158 */ 159 UBool fIsPhraseBreaking = false; 160 161 //======================================================================= 162 // constructors 163 //======================================================================= 164 165 /** 166 * Constructor from a flattened set of RBBI data in malloced memory. 167 * RulesBasedBreakIterators built from a custom set of rules 168 * are created via this constructor; the rules are compiled 169 * into memory, then the break iterator is constructed here. 170 * 171 * The break iterator adopts the memory, and will 172 * free it when done. 173 * @internal (private) 174 */ 175 RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status); 176 177 /** 178 * This constructor uses the udata interface to create a BreakIterator 179 * whose internal tables live in a memory-mapped file. "image" is an 180 * ICU UDataMemory handle for the pre-compiled break iterator tables. 181 * @param image handle to the memory image for the break iterator data. 182 * Ownership of the UDataMemory handle passes to the Break Iterator, 183 * which will be responsible for closing it when it is no longer needed. 184 * @param status Information on any errors encountered. 185 * @param isPhraseBreaking true if phrase based breaking is required, otherwise false. 186 * @see udata_open 187 * @see #getBinaryRules 188 * @internal (private) 189 */ 190 RuleBasedBreakIterator(UDataMemory* image, UBool isPhraseBreaking, UErrorCode &status); 191 192 /** @internal */ 193 friend class RBBIRuleBuilder; 194 /** @internal */ 195 friend class BreakIterator; 196 197 /** 198 * Default constructor with an error code parameter. 199 * Aside from error handling, otherwise identical to the default constructor. 200 * Internally, handles common initialization for other constructors. 201 * @internal (private) 202 */ 203 RuleBasedBreakIterator(UErrorCode *status); 204 205 public: 206 207 /** Default constructor. Creates an empty shell of an iterator, with no 208 * rules or text to iterate over. Object can subsequently be assigned to, 209 * but is otherwise unusable. 210 * @stable ICU 2.2 211 */ 212 RuleBasedBreakIterator(); 213 214 /** 215 * Copy constructor. Will produce a break iterator with the same behavior, 216 * and which iterates over the same text, as the one passed in. 217 * @param that The RuleBasedBreakIterator passed to be copied 218 * @stable ICU 2.0 219 */ 220 RuleBasedBreakIterator(const RuleBasedBreakIterator& that); 221 222 /** 223 * Construct a RuleBasedBreakIterator from a set of rules supplied as a string. 224 * @param rules The break rules to be used. 225 * @param parseError In the event of a syntax error in the rules, provides the location 226 * within the rules of the problem. 227 * @param status Information on any errors encountered. 228 * @stable ICU 2.2 229 */ 230 RuleBasedBreakIterator( const UnicodeString &rules, 231 UParseError &parseError, 232 UErrorCode &status); 233 234 /** 235 * Construct a RuleBasedBreakIterator from a set of precompiled binary rules. 236 * Binary rules are obtained from RulesBasedBreakIterator::getBinaryRules(). 237 * Construction of a break iterator in this way is substantially faster than 238 * construction from source rules. 239 * 240 * Ownership of the storage containing the compiled rules remains with the 241 * caller of this function. The compiled rules must not be modified or 242 * deleted during the life of the break iterator. 243 * 244 * The compiled rules are not compatible across different major versions of ICU. 245 * The compiled rules are compatible only between machines with the same 246 * byte ordering (little or big endian) and the same base character set family 247 * (ASCII or EBCDIC). 248 * 249 * @see #getBinaryRules 250 * @param compiledRules A pointer to the compiled break rules to be used. 251 * @param ruleLength The length of the compiled break rules, in bytes. This 252 * corresponds to the length value produced by getBinaryRules(). 253 * @param status Information on any errors encountered, including invalid 254 * binary rules. 255 * @stable ICU 4.8 256 */ 257 RuleBasedBreakIterator(const uint8_t *compiledRules, 258 uint32_t ruleLength, 259 UErrorCode &status); 260 261 /** 262 * This constructor uses the udata interface to create a BreakIterator 263 * whose internal tables live in a memory-mapped file. "image" is an 264 * ICU UDataMemory handle for the pre-compiled break iterator tables. 265 * @param image handle to the memory image for the break iterator data. 266 * Ownership of the UDataMemory handle passes to the Break Iterator, 267 * which will be responsible for closing it when it is no longer needed. 268 * @param status Information on any errors encountered. 269 * @see udata_open 270 * @see #getBinaryRules 271 * @stable ICU 2.8 272 */ 273 RuleBasedBreakIterator(UDataMemory* image, UErrorCode &status); 274 275 /** 276 * Destructor 277 * @stable ICU 2.0 278 */ 279 virtual ~RuleBasedBreakIterator(); 280 281 /** 282 * Assignment operator. Sets this iterator to have the same behavior, 283 * and iterate over the same text, as the one passed in. 284 * @param that The RuleBasedBreakItertor passed in 285 * @return the newly created RuleBasedBreakIterator 286 * @stable ICU 2.0 287 */ 288 RuleBasedBreakIterator& operator=(const RuleBasedBreakIterator& that); 289 290 /** 291 * Equality operator. Returns true if both BreakIterators are of the 292 * same class, have the same behavior, and iterate over the same text. 293 * @param that The BreakIterator to be compared for equality 294 * @return true if both BreakIterators are of the 295 * same class, have the same behavior, and iterate over the same text. 296 * @stable ICU 2.0 297 */ 298 virtual bool operator==(const BreakIterator& that) const override; 299 300 /** 301 * Not-equal operator. If operator== returns true, this returns false, 302 * and vice versa. 303 * @param that The BreakIterator to be compared for inequality 304 * @return true if both BreakIterators are not same. 305 * @stable ICU 2.0 306 */ 307 inline bool operator!=(const BreakIterator& that) const { 308 return !operator==(that); 309 } 310 311 /** 312 * Returns a newly-constructed RuleBasedBreakIterator with the same 313 * behavior, and iterating over the same text, as this one. 314 * Differs from the copy constructor in that it is polymorphic, and 315 * will correctly clone (copy) a derived class. 316 * clone() is thread safe. Multiple threads may simultaneously 317 * clone the same source break iterator. 318 * @return a newly-constructed RuleBasedBreakIterator 319 * @stable ICU 2.0 320 */ 321 virtual RuleBasedBreakIterator* clone() const override; 322 323 /** 324 * Compute a hash code for this BreakIterator 325 * @return A hash code 326 * @stable ICU 2.0 327 */ 328 virtual int32_t hashCode(void) const; 329 330 /** 331 * Returns the description used to create this iterator 332 * @return the description used to create this iterator 333 * @stable ICU 2.0 334 */ 335 virtual const UnicodeString& getRules(void) const; 336 337 //======================================================================= 338 // BreakIterator overrides 339 //======================================================================= 340 341 /** 342 * <p> 343 * Return a CharacterIterator over the text being analyzed. 344 * The returned character iterator is owned by the break iterator, and must 345 * not be deleted by the caller. Repeated calls to this function may 346 * return the same CharacterIterator. 347 * </p> 348 * <p> 349 * The returned character iterator must not be used concurrently with 350 * the break iterator. If concurrent operation is needed, clone the 351 * returned character iterator first and operate on the clone. 352 * </p> 353 * <p> 354 * When the break iterator is operating on text supplied via a UText, 355 * this function will fail, returning a CharacterIterator containing no text. 356 * The function getUText() provides similar functionality, 357 * is reliable, and is more efficient. 358 * </p> 359 * 360 * TODO: deprecate this function? 361 * 362 * @return An iterator over the text being analyzed. 363 * @stable ICU 2.0 364 */ 365 virtual CharacterIterator& getText(void) const override; 366 367 368 /** 369 * Get a UText for the text being analyzed. 370 * The returned UText is a shallow clone of the UText used internally 371 * by the break iterator implementation. It can safely be used to 372 * access the text without impacting any break iterator operations, 373 * but the underlying text itself must not be altered. 374 * 375 * @param fillIn A UText to be filled in. If nullptr, a new UText will be 376 * allocated to hold the result. 377 * @param status receives any error codes. 378 * @return The current UText for this break iterator. If an input 379 * UText was provided, it will always be returned. 380 * @stable ICU 3.4 381 */ 382 virtual UText *getUText(UText *fillIn, UErrorCode &status) const override; 383 384 /** 385 * Set the iterator to analyze a new piece of text. This function resets 386 * the current iteration position to the beginning of the text. 387 * @param newText An iterator over the text to analyze. The BreakIterator 388 * takes ownership of the character iterator. The caller MUST NOT delete it! 389 * @stable ICU 2.0 390 */ 391 virtual void adoptText(CharacterIterator* newText) override; 392 393 /** 394 * Set the iterator to analyze a new piece of text. This function resets 395 * the current iteration position to the beginning of the text. 396 * 397 * The BreakIterator will retain a reference to the supplied string. 398 * The caller must not modify or delete the text while the BreakIterator 399 * retains the reference. 400 * 401 * @param newText The text to analyze. 402 * @stable ICU 2.0 403 */ 404 virtual void setText(const UnicodeString& newText) override; 405 406 /** 407 * Reset the break iterator to operate over the text represented by 408 * the UText. The iterator position is reset to the start. 409 * 410 * This function makes a shallow clone of the supplied UText. This means 411 * that the caller is free to immediately close or otherwise reuse the 412 * Utext that was passed as a parameter, but that the underlying text itself 413 * must not be altered while being referenced by the break iterator. 414 * 415 * @param text The UText used to change the text. 416 * @param status Receives any error codes. 417 * @stable ICU 3.4 418 */ 419 virtual void setText(UText *text, UErrorCode &status) override; 420 421 /** 422 * Sets the current iteration position to the beginning of the text, position zero. 423 * @return The offset of the beginning of the text, zero. 424 * @stable ICU 2.0 425 */ 426 virtual int32_t first(void) override; 427 428 /** 429 * Sets the current iteration position to the end of the text. 430 * @return The text's past-the-end offset. 431 * @stable ICU 2.0 432 */ 433 virtual int32_t last(void) override; 434 435 /** 436 * Advances the iterator either forward or backward the specified number of steps. 437 * Negative values move backward, and positive values move forward. This is 438 * equivalent to repeatedly calling next() or previous(). 439 * @param n The number of steps to move. The sign indicates the direction 440 * (negative is backwards, and positive is forwards). 441 * @return The character offset of the boundary position n boundaries away from 442 * the current one. 443 * @stable ICU 2.0 444 */ 445 virtual int32_t next(int32_t n) override; 446 447 /** 448 * Advances the iterator to the next boundary position. 449 * @return The position of the first boundary after this one. 450 * @stable ICU 2.0 451 */ 452 virtual int32_t next(void) override; 453 454 /** 455 * Moves the iterator backwards, to the last boundary preceding this one. 456 * @return The position of the last boundary position preceding this one. 457 * @stable ICU 2.0 458 */ 459 virtual int32_t previous(void) override; 460 461 /** 462 * Sets the iterator to refer to the first boundary position following 463 * the specified position. 464 * @param offset The position from which to begin searching for a break position. 465 * @return The position of the first break after the current position. 466 * @stable ICU 2.0 467 */ 468 virtual int32_t following(int32_t offset) override; 469 470 /** 471 * Sets the iterator to refer to the last boundary position before the 472 * specified position. 473 * @param offset The position to begin searching for a break from. 474 * @return The position of the last boundary before the starting position. 475 * @stable ICU 2.0 476 */ 477 virtual int32_t preceding(int32_t offset) override; 478 479 /** 480 * Returns true if the specified position is a boundary position. As a side 481 * effect, leaves the iterator pointing to the first boundary position at 482 * or after "offset". 483 * @param offset the offset to check. 484 * @return True if "offset" is a boundary position. 485 * @stable ICU 2.0 486 */ 487 virtual UBool isBoundary(int32_t offset) override; 488 489 /** 490 * Returns the current iteration position. Note that UBRK_DONE is never 491 * returned from this function; if iteration has run to the end of a 492 * string, current() will return the length of the string while 493 * next() will return UBRK_DONE). 494 * @return The current iteration position. 495 * @stable ICU 2.0 496 */ 497 virtual int32_t current(void) const override; 498 499 500 /** 501 * Return the status tag from the break rule that determined the boundary at 502 * the current iteration position. For break rules that do not specify a 503 * status, a default value of 0 is returned. If more than one break rule 504 * would cause a boundary to be located at some position in the text, 505 * the numerically largest of the applicable status values is returned. 506 * <p> 507 * Of the standard types of ICU break iterators, only word break and 508 * line break provide status values. The values are defined in 509 * the header file ubrk.h. For Word breaks, the status allows distinguishing between words 510 * that contain alphabetic letters, "words" that appear to be numbers, 511 * punctuation and spaces, words containing ideographic characters, and 512 * more. For Line Break, the status distinguishes between hard (mandatory) breaks 513 * and soft (potential) break positions. 514 * <p> 515 * <code>getRuleStatus()</code> can be called after obtaining a boundary 516 * position from <code>next()</code>, <code>previous()</code>, or 517 * any other break iterator functions that returns a boundary position. 518 * <p> 519 * Note that <code>getRuleStatus()</code> returns the value corresponding to 520 * <code>current()</code> index even after <code>next()</code> has returned DONE. 521 * <p> 522 * When creating custom break rules, one is free to define whatever 523 * status values may be convenient for the application. 524 * <p> 525 * @return the status from the break rule that determined the boundary 526 * at the current iteration position. 527 * 528 * @see UWordBreak 529 * @stable ICU 2.2 530 */ 531 virtual int32_t getRuleStatus() const override; 532 533 /** 534 * Get the status (tag) values from the break rule(s) that determined the boundary 535 * at the current iteration position. 536 * <p> 537 * The returned status value(s) are stored into an array provided by the caller. 538 * The values are stored in sorted (ascending) order. 539 * If the capacity of the output array is insufficient to hold the data, 540 * the output will be truncated to the available length, and a 541 * U_BUFFER_OVERFLOW_ERROR will be signaled. 542 * 543 * @param fillInVec an array to be filled in with the status values. 544 * @param capacity the length of the supplied vector. A length of zero causes 545 * the function to return the number of status values, in the 546 * normal way, without attempting to store any values. 547 * @param status receives error codes. 548 * @return The number of rule status values from the rules that determined 549 * the boundary at the current iteration position. 550 * In the event of a U_BUFFER_OVERFLOW_ERROR, the return value 551 * is the total number of status values that were available, 552 * not the reduced number that were actually returned. 553 * @see getRuleStatus 554 * @stable ICU 3.0 555 */ 556 virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status) override; 557 558 /** 559 * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. 560 * This method is to implement a simple version of RTTI, since not all 561 * C++ compilers support genuine RTTI. Polymorphic operator==() and 562 * clone() methods call this method. 563 * 564 * @return The class ID for this object. All objects of a 565 * given class have the same class ID. Objects of 566 * other classes have different class IDs. 567 * @stable ICU 2.0 568 */ 569 virtual UClassID getDynamicClassID(void) const override; 570 571 /** 572 * Returns the class ID for this class. This is useful only for 573 * comparing to a return value from getDynamicClassID(). For example: 574 * 575 * Base* polymorphic_pointer = createPolymorphicObject(); 576 * if (polymorphic_pointer->getDynamicClassID() == 577 * Derived::getStaticClassID()) ... 578 * 579 * @return The class ID for all objects of this class. 580 * @stable ICU 2.0 581 */ 582 static UClassID U_EXPORT2 getStaticClassID(void); 583 584 #ifndef U_FORCE_HIDE_DEPRECATED_API 585 /** 586 * Deprecated functionality. Use clone() instead. 587 * 588 * Create a clone (copy) of this break iterator in memory provided 589 * by the caller. The idea is to increase performance by avoiding 590 * a storage allocation. Use of this function is NOT RECOMMENDED. 591 * Performance gains are minimal, and correct buffer management is 592 * tricky. Use clone() instead. 593 * 594 * @param stackBuffer The pointer to the memory into which the cloned object 595 * should be placed. If nullptr, allocate heap memory 596 * for the cloned object. 597 * @param BufferSize The size of the buffer. If zero, return the required 598 * buffer size, but do not clone the object. If the 599 * size was too small (but not zero), allocate heap 600 * storage for the cloned object. 601 * 602 * @param status Error status. U_SAFECLONE_ALLOCATED_WARNING will be 603 * returned if the provided buffer was too small, and 604 * the clone was therefore put on the heap. 605 * 606 * @return Pointer to the clone object. This may differ from the stackBuffer 607 * address if the byte alignment of the stack buffer was not suitable 608 * or if the stackBuffer was too small to hold the clone. 609 * @deprecated ICU 52. Use clone() instead. 610 */ 611 virtual RuleBasedBreakIterator *createBufferClone(void *stackBuffer, 612 int32_t &BufferSize, 613 UErrorCode &status) override; 614 #endif // U_FORCE_HIDE_DEPRECATED_API 615 616 /** 617 * Return the binary form of compiled break rules, 618 * which can then be used to create a new break iterator at some 619 * time in the future. Creating a break iterator from pre-compiled rules 620 * is much faster than building one from the source form of the 621 * break rules. 622 * 623 * The binary data can only be used with the same version of ICU 624 * and on the same platform type (processor endian-ness) 625 * 626 * @param length Returns the length of the binary data. (Out parameter.) 627 * 628 * @return A pointer to the binary (compiled) rule data. The storage 629 * belongs to the RulesBasedBreakIterator object, not the 630 * caller, and must not be modified or deleted. 631 * @stable ICU 4.8 632 */ 633 virtual const uint8_t *getBinaryRules(uint32_t &length); 634 635 /** 636 * Set the subject text string upon which the break iterator is operating 637 * without changing any other aspect of the matching state. 638 * The new and previous text strings must have the same content. 639 * 640 * This function is intended for use in environments where ICU is operating on 641 * strings that may move around in memory. It provides a mechanism for notifying 642 * ICU that the string has been relocated, and providing a new UText to access the 643 * string in its new position. 644 * 645 * Note that the break iterator implementation never copies the underlying text 646 * of a string being processed, but always operates directly on the original text 647 * provided by the user. Refreshing simply drops the references to the old text 648 * and replaces them with references to the new. 649 * 650 * Caution: this function is normally used only by very specialized, 651 * system-level code. One example use case is with garbage collection that moves 652 * the text in memory. 653 * 654 * @param input The new (moved) text string. 655 * @param status Receives errors detected by this function. 656 * @return *this 657 * 658 * @stable ICU 49 659 */ 660 virtual RuleBasedBreakIterator &refreshInputText(UText *input, UErrorCode &status) override; 661 662 663 private: 664 //======================================================================= 665 // implementation 666 //======================================================================= 667 /** 668 * Iterate backwards from an arbitrary position in the input text using the 669 * synthesized Safe Reverse rules. 670 * This locates a "Safe Position" from which the forward break rules 671 * will operate correctly. A Safe Position is not necessarily a boundary itself. 672 * 673 * @param fromPosition the position in the input text to begin the iteration. 674 * @internal (private) 675 */ 676 int32_t handleSafePrevious(int32_t fromPosition); 677 678 /** 679 * Find a rule-based boundary by running the state machine. 680 * Input 681 * fPosition, the position in the text to begin from. 682 * Output 683 * fPosition: the boundary following the starting position. 684 * fDictionaryCharCount the number of dictionary characters encountered. 685 * If > 0, the segment will be further subdivided 686 * fRuleStatusIndex Info from the state table indicating which rules caused the boundary. 687 * 688 * @internal (private) 689 */ 690 int32_t handleNext(); 691 692 /* 693 * Templatized version of handleNext() and handleSafePrevious(). 694 * 695 * There will be exactly four instantiations, two each for 8 and 16 bit tables, 696 * two each for 8 and 16 bit trie. 697 * Having separate instantiations for the table types keeps conditional tests of 698 * the table type out of the inner loops, at the expense of replicated code. 699 * 700 * The template parameter for the Trie access function is a value, not a type. 701 * Doing it this way, the compiler will inline the Trie function in the 702 * expanded functions. (Both the 8 and 16 bit access functions have the same type 703 * signature) 704 */ 705 706 typedef uint16_t (*PTrieFunc)(const UCPTrie *, UChar32); 707 708 template<typename RowType, PTrieFunc trieFunc> 709 int32_t handleSafePrevious(int32_t fromPosition); 710 711 template<typename RowType, PTrieFunc trieFunc> 712 int32_t handleNext(); 713 714 715 /** 716 * This function returns the appropriate LanguageBreakEngine for a 717 * given character c. 718 * @param c A character in the dictionary set 719 * @internal (private) 720 */ 721 const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c); 722 723 public: 724 #ifndef U_HIDE_INTERNAL_API 725 /** 726 * Debugging function only. 727 * @internal 728 */ 729 void dumpCache(); 730 731 /** 732 * Debugging function only. 733 * @internal 734 */ 735 void dumpTables(); 736 #endif /* U_HIDE_INTERNAL_API */ 737 }; 738 739 U_NAMESPACE_END 740 741 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 742 743 #endif /* U_SHOW_CPLUSPLUS_API */ 744 745 #endif 746