1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 * Copyright (C) 2001-2005, International Business Machines Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Date Name Description 7 * 07/18/01 aliu Creation. 8 ********************************************************************** 9 */ 10 #ifndef UNIMATCH_H 11 #define UNIMATCH_H 12 13 #include "unicode/utypes.h" 14 15 /** 16 * \file 17 * \brief C++ API: Unicode Matcher 18 */ 19 20 #if U_SHOW_CPLUSPLUS_API 21 22 U_NAMESPACE_BEGIN 23 24 class Replaceable; 25 class UnicodeString; 26 class UnicodeSet; 27 28 /** 29 * Constants returned by <code>UnicodeMatcher::matches()</code> 30 * indicating the degree of match. 31 * @stable ICU 2.4 32 */ 33 enum UMatchDegree { 34 /** 35 * Constant returned by <code>matches()</code> indicating a 36 * mismatch between the text and this matcher. The text contains 37 * a character which does not match, or the text does not contain 38 * all desired characters for a non-incremental match. 39 * @stable ICU 2.4 40 */ 41 U_MISMATCH, 42 43 /** 44 * Constant returned by <code>matches()</code> indicating a 45 * partial match between the text and this matcher. This value is 46 * only returned for incremental match operations. All characters 47 * of the text match, but more characters are required for a 48 * complete match. Alternatively, for variable-length matchers, 49 * all characters of the text match, and if more characters were 50 * supplied at limit, they might also match. 51 * @stable ICU 2.4 52 */ 53 U_PARTIAL_MATCH, 54 55 /** 56 * Constant returned by <code>matches()</code> indicating a 57 * complete match between the text and this matcher. For an 58 * incremental variable-length match, this value is returned if 59 * the given text matches, and it is known that additional 60 * characters would not alter the extent of the match. 61 * @stable ICU 2.4 62 */ 63 U_MATCH 64 }; 65 66 /** 67 * <code>UnicodeMatcher</code> defines a protocol for objects that can 68 * match a range of characters in a Replaceable string. 69 * @stable ICU 2.4 70 */ 71 class U_COMMON_API UnicodeMatcher /* not : public UObject because this is an interface/mixin class */ { 72 73 public: 74 /** 75 * Destructor. 76 * @stable ICU 2.4 77 */ 78 virtual ~UnicodeMatcher(); 79 80 /** 81 * Return a UMatchDegree value indicating the degree of match for 82 * the given text at the given offset. Zero, one, or more 83 * characters may be matched. 84 * 85 * Matching in the forward direction is indicated by limit > 86 * offset. Characters from offset forwards to limit-1 will be 87 * considered for matching. 88 * 89 * Matching in the reverse direction is indicated by limit < 90 * offset. Characters from offset backwards to limit+1 will be 91 * considered for matching. 92 * 93 * If limit == offset then the only match possible is a zero 94 * character match (which subclasses may implement if desired). 95 * 96 * As a side effect, advance the offset parameter to the limit of 97 * the matched substring. In the forward direction, this will be 98 * the index of the last matched character plus one. In the 99 * reverse direction, this will be the index of the last matched 100 * character minus one. 101 * 102 * <p>Note: This method is not const because some classes may 103 * modify their state as the result of a match. 104 * 105 * @param text the text to be matched 106 * @param offset on input, the index into text at which to begin 107 * matching. On output, the limit of the matched text. The 108 * number of matched characters is the output value of offset 109 * minus the input value. Offset should always point to the 110 * HIGH SURROGATE (leading code unit) of a pair of surrogates, 111 * both on entry and upon return. 112 * @param limit the limit index of text to be matched. Greater 113 * than offset for a forward direction match, less than offset for 114 * a backward direction match. The last character to be 115 * considered for matching will be text.charAt(limit-1) in the 116 * forward direction or text.charAt(limit+1) in the backward 117 * direction. 118 * @param incremental if TRUE, then assume further characters may 119 * be inserted at limit and check for partial matching. Otherwise 120 * assume the text as given is complete. 121 * @return a match degree value indicating a full match, a partial 122 * match, or a mismatch. If incremental is FALSE then 123 * U_PARTIAL_MATCH should never be returned. 124 * @stable ICU 2.4 125 */ 126 virtual UMatchDegree matches(const Replaceable& text, 127 int32_t& offset, 128 int32_t limit, 129 UBool incremental) = 0; 130 131 /** 132 * Returns a string representation of this matcher. If the result of 133 * calling this function is passed to the appropriate parser, it 134 * will produce another matcher that is equal to this one. 135 * @param result the string to receive the pattern. Previous 136 * contents will be deleted. 137 * @param escapeUnprintable if TRUE then convert unprintable 138 * character to their hex escape representations, \\uxxxx or 139 * \\Uxxxxxxxx. Unprintable characters are those other than 140 * U+000A, U+0020..U+007E. 141 * @stable ICU 2.4 142 */ 143 virtual UnicodeString& toPattern(UnicodeString& result, 144 UBool escapeUnprintable = FALSE) const = 0; 145 146 /** 147 * Returns TRUE if this matcher will match a character c, where c 148 * & 0xFF == v, at offset, in the forward direction (with limit > 149 * offset). This is used by <tt>RuleBasedTransliterator</tt> for 150 * indexing. 151 * @stable ICU 2.4 152 */ 153 virtual UBool matchesIndexValue(uint8_t v) const = 0; 154 155 /** 156 * Union the set of all characters that may be matched by this object 157 * into the given set. 158 * @param toUnionTo the set into which to union the source characters 159 * @stable ICU 2.4 160 */ 161 virtual void addMatchSetTo(UnicodeSet& toUnionTo) const = 0; 162 }; 163 164 U_NAMESPACE_END 165 166 #endif /* U_SHOW_CPLUSPLUS_API */ 167 168 #endif 169