• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ***************************************************************************
5 *   Copyright (C) 1999-2016 International Business Machines Corporation   *
6 *   and others. All rights reserved.                                      *
7 ***************************************************************************
8 
9 **********************************************************************
10 *   Date        Name        Description
11 *   10/22/99    alan        Creation.
12 *   11/11/99    rgillam     Complete port from Java.
13 **********************************************************************
14 */
15 
16 #ifndef RBBI_H
17 #define RBBI_H
18 
19 #include "unicode/utypes.h"
20 
21 #if U_SHOW_CPLUSPLUS_API
22 
23 /**
24  * \file
25  * \brief C++ API: Rule Based Break Iterator
26  */
27 
28 #if !UCONFIG_NO_BREAK_ITERATION
29 
30 #include "unicode/brkiter.h"
31 #include "unicode/udata.h"
32 #include "unicode/parseerr.h"
33 #include "unicode/schriter.h"
34 
35 struct UCPTrie;
36 
37 U_NAMESPACE_BEGIN
38 
39 /** @internal */
40 class  LanguageBreakEngine;
41 struct RBBIDataHeader;
42 class  RBBIDataWrapper;
43 class  UnhandledEngine;
44 class  UStack;
45 
46 /**
47  *
48  * A subclass of BreakIterator whose behavior is specified using a list of rules.
49  * <p>Instances of this class are most commonly created by the factory methods of
50  *  BreakIterator::createWordInstance(), BreakIterator::createLineInstance(), etc.,
51  *  and then used via the abstract API in class BreakIterator</p>
52  *
53  * <p>See the ICU User Guide for information on Break Iterator Rules.</p>
54  *
55  * <p>This class is not intended to be subclassed.</p>
56  */
57 class U_COMMON_API RuleBasedBreakIterator /*final*/ : public BreakIterator {
58 
59 private:
60     /**
61      * The UText through which this BreakIterator accesses the text
62      * @internal (private)
63      */
64     UText  fText = UTEXT_INITIALIZER;
65 
66 #ifndef U_HIDE_INTERNAL_API
67 public:
68 #endif /* U_HIDE_INTERNAL_API */
69     /**
70      * The rule data for this BreakIterator instance.
71      * Not for general use; Public only for testing purposes.
72      * @internal
73      */
74     RBBIDataWrapper    *fData = nullptr;
75 
76 private:
77     /**
78       * The saved error code associated with this break iterator.
79       * This is the value to be returned by copyErrorTo().
80       */
81     UErrorCode      fErrorCode = U_ZERO_ERROR;
82 
83     /**
84       * The current  position of the iterator. Pinned, 0 < fPosition <= text.length.
85       * Never has the value UBRK_DONE (-1).
86       */
87     int32_t         fPosition = 0;
88 
89     /**
90       * TODO:
91       */
92     int32_t         fRuleStatusIndex = 0;
93 
94     /**
95      *   Cache of previously determined boundary positions.
96      */
97     class BreakCache;
98     BreakCache         *fBreakCache = nullptr;
99 
100     /**
101      *  Cache of boundary positions within a region of text that has been
102      *  sub-divided by dictionary based breaking.
103      */
104     class DictionaryCache;
105     DictionaryCache *fDictionaryCache = nullptr;
106 
107     /**
108      *
109      * If present, UStack of LanguageBreakEngine objects that might handle
110      * dictionary characters. Searched from top to bottom to find an object to
111      * handle a given character.
112      * @internal (private)
113      */
114     UStack              *fLanguageBreakEngines = nullptr;
115 
116     /**
117      *
118      * If present, the special LanguageBreakEngine used for handling
119      * characters that are in the dictionary set, but not handled by any
120      * LanguageBreakEngine.
121      * @internal (private)
122      */
123     UnhandledEngine     *fUnhandledBreakEngine = nullptr;
124 
125     /**
126      * Counter for the number of characters encountered with the "dictionary"
127      *   flag set.
128      * @internal (private)
129      */
130     uint32_t            fDictionaryCharCount = 0;
131 
132     /**
133      *   A character iterator that refers to the same text as the UText, above.
134      *   Only included for compatibility with old API, which was based on CharacterIterators.
135      *   Value may be adopted from outside, or one of fSCharIter or fDCharIter, below.
136      */
137     CharacterIterator  *fCharIter = &fSCharIter;
138 
139     /**
140      *   When the input text is provided by a UnicodeString, this will point to
141      *    a characterIterator that wraps that data.  Needed only for the
142      *    implementation of getText(), a backwards compatibility issue.
143      */
144     UCharCharacterIterator fSCharIter {u"", 0};
145 
146     /**
147       * True when iteration has run off the end, and iterator functions should return UBRK_DONE.
148       */
149     bool           fDone = false;
150 
151     /**
152      *  Array of look-ahead tentative results.
153      */
154     int32_t *fLookAheadMatches = nullptr;
155 
156     /**
157      *  A flag to indicate if phrase based breaking is enabled.
158      */
159     UBool fIsPhraseBreaking = false;
160 
161     //=======================================================================
162     // constructors
163     //=======================================================================
164 
165     /**
166      * Constructor from a flattened set of RBBI data in malloced memory.
167      *             RulesBasedBreakIterators built from a custom set of rules
168      *             are created via this constructor; the rules are compiled
169      *             into memory, then the break iterator is constructed here.
170      *
171      *             The break iterator adopts the memory, and will
172      *             free it when done.
173      * @internal (private)
174      */
175     RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status);
176 
177     /**
178      * This constructor uses the udata interface to create a BreakIterator
179      * whose internal tables live in a memory-mapped file.  "image" is an
180      * ICU UDataMemory handle for the pre-compiled break iterator tables.
181      * @param image handle to the memory image for the break iterator data.
182      *        Ownership of the UDataMemory handle passes to the Break Iterator,
183      *        which will be responsible for closing it when it is no longer needed.
184      * @param status Information on any errors encountered.
185      * @param isPhraseBreaking true if phrase based breaking is required, otherwise false.
186      * @see udata_open
187      * @see #getBinaryRules
188      * @internal (private)
189      */
190     RuleBasedBreakIterator(UDataMemory* image, UBool isPhraseBreaking, UErrorCode &status);
191 
192     /** @internal */
193     friend class RBBIRuleBuilder;
194     /** @internal */
195     friend class BreakIterator;
196 
197     /**
198      * Default constructor with an error code parameter.
199      * Aside from error handling, otherwise identical to the default constructor.
200      * Internally, handles common initialization for other constructors.
201      * @internal (private)
202      */
203     RuleBasedBreakIterator(UErrorCode *status);
204 
205 public:
206 
207     /** Default constructor.  Creates an empty shell of an iterator, with no
208      *  rules or text to iterate over.   Object can subsequently be assigned to,
209      *  but is otherwise unusable.
210      *  @stable ICU 2.2
211      */
212     RuleBasedBreakIterator();
213 
214     /**
215      * Copy constructor.  Will produce a break iterator with the same behavior,
216      * and which iterates over the same text, as the one passed in.
217      * @param that The RuleBasedBreakIterator passed to be copied
218      * @stable ICU 2.0
219      */
220     RuleBasedBreakIterator(const RuleBasedBreakIterator& that);
221 
222     /**
223      * Construct a RuleBasedBreakIterator from a set of rules supplied as a string.
224      * @param rules The break rules to be used.
225      * @param parseError  In the event of a syntax error in the rules, provides the location
226      *                    within the rules of the problem.
227      * @param status Information on any errors encountered.
228      * @stable ICU 2.2
229      */
230     RuleBasedBreakIterator( const UnicodeString    &rules,
231                              UParseError           &parseError,
232                              UErrorCode            &status);
233 
234     /**
235      * Construct a RuleBasedBreakIterator from a set of precompiled binary rules.
236      * Binary rules are obtained from RulesBasedBreakIterator::getBinaryRules().
237      * Construction of a break iterator in this way is substantially faster than
238      * construction from source rules.
239      *
240      * Ownership of the storage containing the compiled rules remains with the
241      * caller of this function.  The compiled rules must not be  modified or
242      * deleted during the life of the break iterator.
243      *
244      * The compiled rules are not compatible across different major versions of ICU.
245      * The compiled rules are compatible only between machines with the same
246      * byte ordering (little or big endian) and the same base character set family
247      * (ASCII or EBCDIC).
248      *
249      * @see #getBinaryRules
250      * @param compiledRules A pointer to the compiled break rules to be used.
251      * @param ruleLength The length of the compiled break rules, in bytes.  This
252      *   corresponds to the length value produced by getBinaryRules().
253      * @param status Information on any errors encountered, including invalid
254      *   binary rules.
255      * @stable ICU 4.8
256      */
257     RuleBasedBreakIterator(const uint8_t *compiledRules,
258                            uint32_t       ruleLength,
259                            UErrorCode    &status);
260 
261     /**
262      * This constructor uses the udata interface to create a BreakIterator
263      * whose internal tables live in a memory-mapped file.  "image" is an
264      * ICU UDataMemory handle for the pre-compiled break iterator tables.
265      * @param image handle to the memory image for the break iterator data.
266      *        Ownership of the UDataMemory handle passes to the Break Iterator,
267      *        which will be responsible for closing it when it is no longer needed.
268      * @param status Information on any errors encountered.
269      * @see udata_open
270      * @see #getBinaryRules
271      * @stable ICU 2.8
272      */
273     RuleBasedBreakIterator(UDataMemory* image, UErrorCode &status);
274 
275     /**
276      * Destructor
277      *  @stable ICU 2.0
278      */
279     virtual ~RuleBasedBreakIterator();
280 
281     /**
282      * Assignment operator.  Sets this iterator to have the same behavior,
283      * and iterate over the same text, as the one passed in.
284      * @param that The RuleBasedBreakItertor passed in
285      * @return the newly created RuleBasedBreakIterator
286      *  @stable ICU 2.0
287      */
288     RuleBasedBreakIterator& operator=(const RuleBasedBreakIterator& that);
289 
290     /**
291      * Equality operator.  Returns true if both BreakIterators are of the
292      * same class, have the same behavior, and iterate over the same text.
293      * @param that The BreakIterator to be compared for equality
294      * @return true if both BreakIterators are of the
295      * same class, have the same behavior, and iterate over the same text.
296      *  @stable ICU 2.0
297      */
298     virtual bool operator==(const BreakIterator& that) const override;
299 
300     /**
301      * Not-equal operator.  If operator== returns true, this returns false,
302      * and vice versa.
303      * @param that The BreakIterator to be compared for inequality
304      * @return true if both BreakIterators are not same.
305      *  @stable ICU 2.0
306      */
307     inline bool operator!=(const BreakIterator& that) const {
308         return !operator==(that);
309     }
310 
311     /**
312      * Returns a newly-constructed RuleBasedBreakIterator with the same
313      * behavior, and iterating over the same text, as this one.
314      * Differs from the copy constructor in that it is polymorphic, and
315      * will correctly clone (copy) a derived class.
316      * clone() is thread safe.  Multiple threads may simultaneously
317      * clone the same source break iterator.
318      * @return a newly-constructed RuleBasedBreakIterator
319      * @stable ICU 2.0
320      */
321     virtual RuleBasedBreakIterator* clone() const override;
322 
323     /**
324      * Compute a hash code for this BreakIterator
325      * @return A hash code
326      *  @stable ICU 2.0
327      */
328     virtual int32_t hashCode(void) const;
329 
330     /**
331      * Returns the description used to create this iterator
332      * @return the description used to create this iterator
333      *  @stable ICU 2.0
334      */
335     virtual const UnicodeString& getRules(void) const;
336 
337     //=======================================================================
338     // BreakIterator overrides
339     //=======================================================================
340 
341     /**
342      * <p>
343      * Return a CharacterIterator over the text being analyzed.
344      * The returned character iterator is owned by the break iterator, and must
345      * not be deleted by the caller.  Repeated calls to this function may
346      * return the same CharacterIterator.
347      * </p>
348      * <p>
349      * The returned character iterator must not be used concurrently with
350      * the break iterator.  If concurrent operation is needed, clone the
351      * returned character iterator first and operate on the clone.
352      * </p>
353      * <p>
354      * When the break iterator is operating on text supplied via a UText,
355      * this function will fail, returning a CharacterIterator containing no text.
356      * The function getUText() provides similar functionality,
357      * is reliable, and is more efficient.
358      * </p>
359      *
360      * TODO:  deprecate this function?
361      *
362      * @return An iterator over the text being analyzed.
363      * @stable ICU 2.0
364      */
365     virtual  CharacterIterator& getText(void) const override;
366 
367 
368     /**
369       *  Get a UText for the text being analyzed.
370       *  The returned UText is a shallow clone of the UText used internally
371       *  by the break iterator implementation.  It can safely be used to
372       *  access the text without impacting any break iterator operations,
373       *  but the underlying text itself must not be altered.
374       *
375       * @param fillIn A UText to be filled in.  If nullptr, a new UText will be
376       *           allocated to hold the result.
377       * @param status receives any error codes.
378       * @return   The current UText for this break iterator.  If an input
379       *           UText was provided, it will always be returned.
380       * @stable ICU 3.4
381       */
382      virtual UText *getUText(UText *fillIn, UErrorCode &status) const override;
383 
384     /**
385      * Set the iterator to analyze a new piece of text.  This function resets
386      * the current iteration position to the beginning of the text.
387      * @param newText An iterator over the text to analyze.  The BreakIterator
388      * takes ownership of the character iterator.  The caller MUST NOT delete it!
389      *  @stable ICU 2.0
390      */
391     virtual void adoptText(CharacterIterator* newText) override;
392 
393     /**
394      * Set the iterator to analyze a new piece of text.  This function resets
395      * the current iteration position to the beginning of the text.
396      *
397      * The BreakIterator will retain a reference to the supplied string.
398      * The caller must not modify or delete the text while the BreakIterator
399      * retains the reference.
400      *
401      * @param newText The text to analyze.
402      *  @stable ICU 2.0
403      */
404     virtual void setText(const UnicodeString& newText) override;
405 
406     /**
407      * Reset the break iterator to operate over the text represented by
408      * the UText.  The iterator position is reset to the start.
409      *
410      * This function makes a shallow clone of the supplied UText.  This means
411      * that the caller is free to immediately close or otherwise reuse the
412      * Utext that was passed as a parameter, but that the underlying text itself
413      * must not be altered while being referenced by the break iterator.
414      *
415      * @param text    The UText used to change the text.
416      * @param status  Receives any error codes.
417      * @stable ICU 3.4
418      */
419     virtual void  setText(UText *text, UErrorCode &status) override;
420 
421     /**
422      * Sets the current iteration position to the beginning of the text, position zero.
423      * @return The offset of the beginning of the text, zero.
424      *  @stable ICU 2.0
425      */
426     virtual int32_t first(void) override;
427 
428     /**
429      * Sets the current iteration position to the end of the text.
430      * @return The text's past-the-end offset.
431      *  @stable ICU 2.0
432      */
433     virtual int32_t last(void) override;
434 
435     /**
436      * Advances the iterator either forward or backward the specified number of steps.
437      * Negative values move backward, and positive values move forward.  This is
438      * equivalent to repeatedly calling next() or previous().
439      * @param n The number of steps to move.  The sign indicates the direction
440      * (negative is backwards, and positive is forwards).
441      * @return The character offset of the boundary position n boundaries away from
442      * the current one.
443      *  @stable ICU 2.0
444      */
445     virtual int32_t next(int32_t n) override;
446 
447     /**
448      * Advances the iterator to the next boundary position.
449      * @return The position of the first boundary after this one.
450      *  @stable ICU 2.0
451      */
452     virtual int32_t next(void) override;
453 
454     /**
455      * Moves the iterator backwards, to the last boundary preceding this one.
456      * @return The position of the last boundary position preceding this one.
457      *  @stable ICU 2.0
458      */
459     virtual int32_t previous(void) override;
460 
461     /**
462      * Sets the iterator to refer to the first boundary position following
463      * the specified position.
464      * @param offset The position from which to begin searching for a break position.
465      * @return The position of the first break after the current position.
466      *  @stable ICU 2.0
467      */
468     virtual int32_t following(int32_t offset) override;
469 
470     /**
471      * Sets the iterator to refer to the last boundary position before the
472      * specified position.
473      * @param offset The position to begin searching for a break from.
474      * @return The position of the last boundary before the starting position.
475      *  @stable ICU 2.0
476      */
477     virtual int32_t preceding(int32_t offset) override;
478 
479     /**
480      * Returns true if the specified position is a boundary position.  As a side
481      * effect, leaves the iterator pointing to the first boundary position at
482      * or after "offset".
483      * @param offset the offset to check.
484      * @return True if "offset" is a boundary position.
485      *  @stable ICU 2.0
486      */
487     virtual UBool isBoundary(int32_t offset) override;
488 
489     /**
490      * Returns the current iteration position. Note that UBRK_DONE is never
491      * returned from this function; if iteration has run to the end of a
492      * string, current() will return the length of the string while
493      * next() will return UBRK_DONE).
494      * @return The current iteration position.
495      * @stable ICU 2.0
496      */
497     virtual int32_t current(void) const override;
498 
499 
500     /**
501      * Return the status tag from the break rule that determined the boundary at
502      * the current iteration position.  For break rules that do not specify a
503      * status, a default value of 0 is returned.  If more than one break rule
504      * would cause a boundary to be located at some position in the text,
505      * the numerically largest of the applicable status values is returned.
506      * <p>
507      * Of the standard types of ICU break iterators, only word break and
508      * line break provide status values.  The values are defined in
509      * the header file ubrk.h.  For Word breaks, the status allows distinguishing between words
510      * that contain alphabetic letters, "words" that appear to be numbers,
511      * punctuation and spaces, words containing ideographic characters, and
512      * more.  For Line Break, the status distinguishes between hard (mandatory) breaks
513      * and soft (potential) break positions.
514      * <p>
515      * <code>getRuleStatus()</code> can be called after obtaining a boundary
516      * position from <code>next()</code>, <code>previous()</code>, or
517      * any other break iterator functions that returns a boundary position.
518      * <p>
519      * Note that <code>getRuleStatus()</code> returns the value corresponding to
520      * <code>current()</code> index even after <code>next()</code> has returned DONE.
521      * <p>
522      * When creating custom break rules, one is free to define whatever
523      * status values may be convenient for the application.
524      * <p>
525      * @return the status from the break rule that determined the boundary
526      * at the current iteration position.
527      *
528      * @see UWordBreak
529      * @stable ICU 2.2
530      */
531     virtual int32_t getRuleStatus() const override;
532 
533    /**
534     * Get the status (tag) values from the break rule(s) that determined the boundary
535     * at the current iteration position.
536     * <p>
537     * The returned status value(s) are stored into an array provided by the caller.
538     * The values are stored in sorted (ascending) order.
539     * If the capacity of the output array is insufficient to hold the data,
540     *  the output will be truncated to the available length, and a
541     *  U_BUFFER_OVERFLOW_ERROR will be signaled.
542     *
543     * @param fillInVec an array to be filled in with the status values.
544     * @param capacity  the length of the supplied vector.  A length of zero causes
545     *                  the function to return the number of status values, in the
546     *                  normal way, without attempting to store any values.
547     * @param status    receives error codes.
548     * @return          The number of rule status values from the rules that determined
549     *                  the boundary at the current iteration position.
550     *                  In the event of a U_BUFFER_OVERFLOW_ERROR, the return value
551     *                  is the total number of status values that were available,
552     *                  not the reduced number that were actually returned.
553     * @see getRuleStatus
554     * @stable ICU 3.0
555     */
556     virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status) override;
557 
558     /**
559      * Returns a unique class ID POLYMORPHICALLY.  Pure virtual override.
560      * This method is to implement a simple version of RTTI, since not all
561      * C++ compilers support genuine RTTI.  Polymorphic operator==() and
562      * clone() methods call this method.
563      *
564      * @return          The class ID for this object. All objects of a
565      *                  given class have the same class ID.  Objects of
566      *                  other classes have different class IDs.
567      * @stable ICU 2.0
568      */
569     virtual UClassID getDynamicClassID(void) const override;
570 
571     /**
572      * Returns the class ID for this class.  This is useful only for
573      * comparing to a return value from getDynamicClassID().  For example:
574      *
575      *      Base* polymorphic_pointer = createPolymorphicObject();
576      *      if (polymorphic_pointer->getDynamicClassID() ==
577      *          Derived::getStaticClassID()) ...
578      *
579      * @return          The class ID for all objects of this class.
580      * @stable ICU 2.0
581      */
582     static UClassID U_EXPORT2 getStaticClassID(void);
583 
584 #ifndef U_FORCE_HIDE_DEPRECATED_API
585     /**
586      * Deprecated functionality. Use clone() instead.
587      *
588      * Create a clone (copy) of this break iterator in memory provided
589      *  by the caller.  The idea is to increase performance by avoiding
590      *  a storage allocation.  Use of this function is NOT RECOMMENDED.
591      *  Performance gains are minimal, and correct buffer management is
592      *  tricky.  Use clone() instead.
593      *
594      * @param stackBuffer  The pointer to the memory into which the cloned object
595      *                     should be placed.  If nullptr,  allocate heap memory
596      *                     for the cloned object.
597      * @param BufferSize   The size of the buffer.  If zero, return the required
598      *                     buffer size, but do not clone the object.  If the
599      *                     size was too small (but not zero), allocate heap
600      *                     storage for the cloned object.
601      *
602      * @param status       Error status.  U_SAFECLONE_ALLOCATED_WARNING will be
603      *                     returned if the provided buffer was too small, and
604      *                     the clone was therefore put on the heap.
605      *
606      * @return  Pointer to the clone object.  This may differ from the stackBuffer
607      *          address if the byte alignment of the stack buffer was not suitable
608      *          or if the stackBuffer was too small to hold the clone.
609      * @deprecated ICU 52. Use clone() instead.
610      */
611     virtual RuleBasedBreakIterator *createBufferClone(void *stackBuffer,
612                                                       int32_t &BufferSize,
613                                                       UErrorCode &status) override;
614 #endif  // U_FORCE_HIDE_DEPRECATED_API
615 
616     /**
617      * Return the binary form of compiled break rules,
618      * which can then be used to create a new break iterator at some
619      * time in the future.  Creating a break iterator from pre-compiled rules
620      * is much faster than building one from the source form of the
621      * break rules.
622      *
623      * The binary data can only be used with the same version of ICU
624      *  and on the same platform type (processor endian-ness)
625      *
626      * @param length Returns the length of the binary data.  (Out parameter.)
627      *
628      * @return   A pointer to the binary (compiled) rule data.  The storage
629      *           belongs to the RulesBasedBreakIterator object, not the
630      *           caller, and must not be modified or deleted.
631      * @stable ICU 4.8
632      */
633     virtual const uint8_t *getBinaryRules(uint32_t &length);
634 
635     /**
636      *  Set the subject text string upon which the break iterator is operating
637      *  without changing any other aspect of the matching state.
638      *  The new and previous text strings must have the same content.
639      *
640      *  This function is intended for use in environments where ICU is operating on
641      *  strings that may move around in memory.  It provides a mechanism for notifying
642      *  ICU that the string has been relocated, and providing a new UText to access the
643      *  string in its new position.
644      *
645      *  Note that the break iterator implementation never copies the underlying text
646      *  of a string being processed, but always operates directly on the original text
647      *  provided by the user. Refreshing simply drops the references to the old text
648      *  and replaces them with references to the new.
649      *
650      *  Caution:  this function is normally used only by very specialized,
651      *  system-level code.  One example use case is with garbage collection that moves
652      *  the text in memory.
653      *
654      * @param input      The new (moved) text string.
655      * @param status     Receives errors detected by this function.
656      * @return           *this
657      *
658      * @stable ICU 49
659      */
660     virtual RuleBasedBreakIterator &refreshInputText(UText *input, UErrorCode &status) override;
661 
662 
663 private:
664     //=======================================================================
665     // implementation
666     //=======================================================================
667     /**
668      * Iterate backwards from an arbitrary position in the input text using the
669      * synthesized Safe Reverse rules.
670      * This locates a "Safe Position" from which the forward break rules
671      * will operate correctly. A Safe Position is not necessarily a boundary itself.
672      *
673      * @param fromPosition the position in the input text to begin the iteration.
674      * @internal (private)
675      */
676     int32_t handleSafePrevious(int32_t fromPosition);
677 
678     /**
679      * Find a rule-based boundary by running the state machine.
680      * Input
681      *    fPosition, the position in the text to begin from.
682      * Output
683      *    fPosition:           the boundary following the starting position.
684      *    fDictionaryCharCount the number of dictionary characters encountered.
685      *                         If > 0, the segment will be further subdivided
686      *    fRuleStatusIndex     Info from the state table indicating which rules caused the boundary.
687      *
688      * @internal (private)
689      */
690     int32_t handleNext();
691 
692     /*
693      * Templatized version of handleNext() and handleSafePrevious().
694      *
695      * There will be exactly four instantiations, two each for 8 and 16 bit tables,
696      * two each for 8 and 16 bit trie.
697      * Having separate instantiations for the table types keeps conditional tests of
698      * the table type out of the inner loops, at the expense of replicated code.
699      *
700      * The template parameter for the Trie access function is a value, not a type.
701      * Doing it this way, the compiler will inline the Trie function in the
702      * expanded functions. (Both the 8 and 16 bit access functions have the same type
703      * signature)
704      */
705 
706     typedef uint16_t (*PTrieFunc)(const UCPTrie *, UChar32);
707 
708     template<typename RowType, PTrieFunc trieFunc>
709     int32_t handleSafePrevious(int32_t fromPosition);
710 
711     template<typename RowType, PTrieFunc trieFunc>
712     int32_t handleNext();
713 
714 
715     /**
716      * This function returns the appropriate LanguageBreakEngine for a
717      * given character c.
718      * @param c         A character in the dictionary set
719      * @internal (private)
720      */
721     const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c);
722 
723   public:
724 #ifndef U_HIDE_INTERNAL_API
725     /**
726      *   Debugging function only.
727      *   @internal
728      */
729      void dumpCache();
730 
731     /**
732      * Debugging function only.
733      * @internal
734      */
735     void dumpTables();
736 #endif  /* U_HIDE_INTERNAL_API */
737 };
738 
739 U_NAMESPACE_END
740 
741 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
742 
743 #endif /* U_SHOW_CPLUSPLUS_API */
744 
745 #endif
746