• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 **********************************************************************
3 * Copyright (C) 1999-2006, International Business Machines Corporation and others.
4 * All Rights Reserved.
5 **********************************************************************
6 *   Date        Name        Description
7 *   11/17/99    aliu        Creation.
8 **********************************************************************
9 */
10 #ifndef UNIFILT_H
11 #define UNIFILT_H
12 
13 #include "unicode/unifunct.h"
14 #include "unicode/unimatch.h"
15 
16 /**
17  * \file
18  * \brief C++ API: Unicode Filter
19  */
20 
21 U_NAMESPACE_BEGIN
22 
23 /**
24  * U_ETHER is used to represent character values for positions outside
25  * a range.  For example, transliterator uses this to represent
26  * characters outside the range contextStart..contextLimit-1.  This
27  * allows explicit matching by rules and UnicodeSets of text outside a
28  * defined range.
29  * @stable ICU 3.0
30  */
31 #define U_ETHER ((UChar)0xFFFF)
32 
33 /**
34  *
35  * <code>UnicodeFilter</code> defines a protocol for selecting a
36  * subset of the full range (U+0000 to U+10FFFF) of Unicode characters.
37  * Currently, filters are used in conjunction with classes like {@link
38  * Transliterator} to only process selected characters through a
39  * transformation.
40  *
41  * <p>Note: UnicodeFilter currently stubs out two pure virtual methods
42  * of its base class, UnicodeMatcher.  These methods are toPattern()
43  * and matchesIndexValue().  This is done so that filter classes that
44  * are not actually used as matchers -- specifically, those in the
45  * UnicodeFilterLogic component, and those in tests -- can continue to
46  * work without defining these methods.  As long as a filter is not
47  * used in an RBT during real transliteration, these methods will not
48  * be called.  However, this breaks the UnicodeMatcher base class
49  * protocol, and it is not a correct solution.
50  *
51  * <p>In the future we may revisit the UnicodeMatcher / UnicodeFilter
52  * hierarchy and either redesign it, or simply remove the stubs in
53  * UnicodeFilter and force subclasses to implement the full
54  * UnicodeMatcher protocol.
55  *
56  * @see UnicodeFilterLogic
57  * @stable ICU 2.0
58  */
59 class U_COMMON_API UnicodeFilter : public UnicodeFunctor, public UnicodeMatcher {
60 
61 public:
62     /**
63      * Destructor
64      * @stable ICU 2.0
65      */
66     virtual ~UnicodeFilter();
67 
68     /**
69      * Returns <tt>true</tt> for characters that are in the selected
70      * subset.  In other words, if a character is <b>to be
71      * filtered</b>, then <tt>contains()</tt> returns
72      * <b><tt>false</tt></b>.
73      * @stable ICU 2.0
74      */
75     virtual UBool contains(UChar32 c) const = 0;
76 
77     /**
78      * UnicodeFunctor API.  Cast 'this' to a UnicodeMatcher* pointer
79      * and return the pointer.
80      * @stable ICU 2.4
81      */
82     virtual UnicodeMatcher* toMatcher() const;
83 
84     /**
85      * Implement UnicodeMatcher API.
86      * @stable ICU 2.4
87      */
88     virtual UMatchDegree matches(const Replaceable& text,
89                                  int32_t& offset,
90                                  int32_t limit,
91                                  UBool incremental);
92 
93     /**
94      * UnicodeFunctor API.  Nothing to do.
95      * @stable ICU 2.4
96      */
97     virtual void setData(const TransliterationRuleData*);
98 
99     /**
100      * ICU "poor man's RTTI", returns a UClassID for the actual class.
101      *
102      * @stable ICU 2.2
103      */
104     virtual UClassID getDynamicClassID() const = 0;
105 
106     /**
107      * ICU "poor man's RTTI", returns a UClassID for this class.
108      *
109      * @stable ICU 2.2
110      */
111     static UClassID U_EXPORT2 getStaticClassID();
112 
113 protected:
114 
115     /*
116      * Since this class has pure virtual functions,
117      * a constructor can't be used.
118      * @stable ICU 2.0
119      */
120 /*    UnicodeFilter();*/
121 };
122 
123 /*inline UnicodeFilter::UnicodeFilter() {}*/
124 
125 U_NAMESPACE_END
126 
127 #endif
128