1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ********************************************************************** 5 * Copyright (C) 1999-2010, International Business Machines Corporation and others. 6 * All Rights Reserved. 7 ********************************************************************** 8 * Date Name Description 9 * 11/17/99 aliu Creation. 10 ********************************************************************** 11 */ 12 #ifndef UNIFILT_H 13 #define UNIFILT_H 14 15 #include "unicode/unifunct.h" 16 #include "unicode/unimatch.h" 17 18 /** 19 * \file 20 * \brief C++ API: Unicode Filter 21 */ 22 23 U_NAMESPACE_BEGIN 24 25 /** 26 * U_ETHER is used to represent character values for positions outside 27 * a range. For example, transliterator uses this to represent 28 * characters outside the range contextStart..contextLimit-1. This 29 * allows explicit matching by rules and UnicodeSets of text outside a 30 * defined range. 31 * @stable ICU 3.0 32 */ 33 #define U_ETHER ((char16_t)0xFFFF) 34 35 /** 36 * 37 * <code>UnicodeFilter</code> defines a protocol for selecting a 38 * subset of the full range (U+0000 to U+10FFFF) of Unicode characters. 39 * Currently, filters are used in conjunction with classes like {@link 40 * Transliterator} to only process selected characters through a 41 * transformation. 42 * 43 * <p>Note: UnicodeFilter currently stubs out two pure virtual methods 44 * of its base class, UnicodeMatcher. These methods are toPattern() 45 * and matchesIndexValue(). This is done so that filter classes that 46 * are not actually used as matchers -- specifically, those in the 47 * UnicodeFilterLogic component, and those in tests -- can continue to 48 * work without defining these methods. As long as a filter is not 49 * used in an RBT during real transliteration, these methods will not 50 * be called. However, this breaks the UnicodeMatcher base class 51 * protocol, and it is not a correct solution. 52 * 53 * <p>In the future we may revisit the UnicodeMatcher / UnicodeFilter 54 * hierarchy and either redesign it, or simply remove the stubs in 55 * UnicodeFilter and force subclasses to implement the full 56 * UnicodeMatcher protocol. 57 * 58 * @see UnicodeFilterLogic 59 * @stable ICU 2.0 60 */ 61 class U_COMMON_API UnicodeFilter : public UnicodeFunctor, public UnicodeMatcher { 62 63 public: 64 /** 65 * Destructor 66 * @stable ICU 2.0 67 */ 68 virtual ~UnicodeFilter(); 69 70 /** 71 * Returns <tt>true</tt> for characters that are in the selected 72 * subset. In other words, if a character is <b>to be 73 * filtered</b>, then <tt>contains()</tt> returns 74 * <b><tt>false</tt></b>. 75 * @stable ICU 2.0 76 */ 77 virtual UBool contains(UChar32 c) const = 0; 78 79 /** 80 * UnicodeFunctor API. Cast 'this' to a UnicodeMatcher* pointer 81 * and return the pointer. 82 * @stable ICU 2.4 83 */ 84 virtual UnicodeMatcher* toMatcher() const; 85 86 /** 87 * Implement UnicodeMatcher API. 88 * @stable ICU 2.4 89 */ 90 virtual UMatchDegree matches(const Replaceable& text, 91 int32_t& offset, 92 int32_t limit, 93 UBool incremental); 94 95 /** 96 * UnicodeFunctor API. Nothing to do. 97 * @stable ICU 2.4 98 */ 99 virtual void setData(const TransliterationRuleData*); 100 101 /** 102 * ICU "poor man's RTTI", returns a UClassID for this class. 103 * 104 * @stable ICU 2.2 105 */ 106 static UClassID U_EXPORT2 getStaticClassID(); 107 108 protected: 109 110 /* 111 * Since this class has pure virtual functions, 112 * a constructor can't be used. 113 * @stable ICU 2.0 114 */ 115 /* UnicodeFilter();*/ 116 }; 117 118 /*inline UnicodeFilter::UnicodeFilter() {}*/ 119 120 U_NAMESPACE_END 121 122 #endif 123