• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5 *******************************************************************************
6 *   Copyright (C) 2009-2014, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *******************************************************************************
9 */
10 package ohos.global.icu.text;
11 
12 import java.io.IOException;
13 
14 import ohos.global.icu.util.ICUUncheckedIOException;
15 
16 /**
17  * Normalization filtered by a UnicodeSet.
18  * Normalizes portions of the text contained in the filter set and leaves
19  * portions not contained in the filter set unchanged.
20  * Filtering is done via UnicodeSet.span(..., UnicodeSet.SpanCondition.SIMPLE).
21  * Not-in-the-filter text is treated as "is normalized" and "quick check yes".
22  * This class implements all of (and only) the Normalizer2 API.
23  * An instance of this class is unmodifiable/immutable.
24  * @author Markus W. Scherer
25  * @hide exposed on OHOS
26  */
27 public class FilteredNormalizer2 extends Normalizer2 {
28     /**
29      * Constructs a filtered normalizer wrapping any Normalizer2 instance
30      * and a filter set.
31      * Both are aliased and must not be modified or deleted while this object
32      * is used.
33      * The filter set should be frozen; otherwise the performance will suffer greatly.
34      * @param n2 wrapped Normalizer2 instance
35      * @param filterSet UnicodeSet which determines the characters to be normalized
36      */
FilteredNormalizer2(Normalizer2 n2, UnicodeSet filterSet)37     public FilteredNormalizer2(Normalizer2 n2, UnicodeSet filterSet) {
38         norm2=n2;
39         set=filterSet;
40     }
41 
42     /**
43      * {@inheritDoc}
44      */
45     @Override
normalize(CharSequence src, StringBuilder dest)46     public StringBuilder normalize(CharSequence src, StringBuilder dest) {
47         if(dest==src) {
48             throw new IllegalArgumentException();
49         }
50         dest.setLength(0);
51         normalize(src, dest, UnicodeSet.SpanCondition.SIMPLE);
52         return dest;
53     }
54     /**
55      * {@inheritDoc}
56      */
57     @Override
normalize(CharSequence src, Appendable dest)58     public Appendable normalize(CharSequence src, Appendable dest) {
59         if(dest==src) {
60             throw new IllegalArgumentException();
61         }
62         return normalize(src, dest, UnicodeSet.SpanCondition.SIMPLE);
63     }
64 
65     /**
66      * {@inheritDoc}
67      */
68     @Override
normalizeSecondAndAppend( StringBuilder first, CharSequence second)69     public StringBuilder normalizeSecondAndAppend(
70             StringBuilder first, CharSequence second) {
71         return normalizeSecondAndAppend(first, second, true);
72     }
73     /**
74      * {@inheritDoc}
75      */
76     @Override
append(StringBuilder first, CharSequence second)77     public StringBuilder append(StringBuilder first, CharSequence second) {
78         return normalizeSecondAndAppend(first, second, false);
79     }
80 
81     /**
82      * {@inheritDoc}
83      */
84     @Override
getDecomposition(int c)85     public String getDecomposition(int c) {
86         return set.contains(c) ? norm2.getDecomposition(c) : null;
87     }
88 
89     /**
90      * {@inheritDoc}
91      */
92     @Override
getRawDecomposition(int c)93     public String getRawDecomposition(int c) {
94         return set.contains(c) ? norm2.getRawDecomposition(c) : null;
95     }
96 
97     /**
98      * {@inheritDoc}
99      */
100     @Override
composePair(int a, int b)101     public int composePair(int a, int b) {
102         return (set.contains(a) && set.contains(b)) ? norm2.composePair(a, b) : -1;
103     }
104 
105     /**
106      * {@inheritDoc}
107      */
108     @Override
getCombiningClass(int c)109     public int getCombiningClass(int c) {
110         return set.contains(c) ? norm2.getCombiningClass(c) : 0;
111     }
112 
113     /**
114      * {@inheritDoc}
115      */
116     @Override
isNormalized(CharSequence s)117     public boolean isNormalized(CharSequence s) {
118         UnicodeSet.SpanCondition spanCondition=UnicodeSet.SpanCondition.SIMPLE;
119         for(int prevSpanLimit=0; prevSpanLimit<s.length();) {
120             int spanLimit=set.span(s, prevSpanLimit, spanCondition);
121             if(spanCondition==UnicodeSet.SpanCondition.NOT_CONTAINED) {
122                 spanCondition=UnicodeSet.SpanCondition.SIMPLE;
123             } else {
124                 if(!norm2.isNormalized(s.subSequence(prevSpanLimit, spanLimit))) {
125                     return false;
126                 }
127                 spanCondition=UnicodeSet.SpanCondition.NOT_CONTAINED;
128             }
129             prevSpanLimit=spanLimit;
130         }
131         return true;
132     }
133 
134     /**
135      * {@inheritDoc}
136      */
137     @Override
quickCheck(CharSequence s)138     public Normalizer.QuickCheckResult quickCheck(CharSequence s) {
139         Normalizer.QuickCheckResult result=Normalizer.YES;
140         UnicodeSet.SpanCondition spanCondition=UnicodeSet.SpanCondition.SIMPLE;
141         for(int prevSpanLimit=0; prevSpanLimit<s.length();) {
142             int spanLimit=set.span(s, prevSpanLimit, spanCondition);
143             if(spanCondition==UnicodeSet.SpanCondition.NOT_CONTAINED) {
144                 spanCondition=UnicodeSet.SpanCondition.SIMPLE;
145             } else {
146                 Normalizer.QuickCheckResult qcResult=
147                     norm2.quickCheck(s.subSequence(prevSpanLimit, spanLimit));
148                 if(qcResult==Normalizer.NO) {
149                     return qcResult;
150                 } else if(qcResult==Normalizer.MAYBE) {
151                     result=qcResult;
152                 }
153                 spanCondition=UnicodeSet.SpanCondition.NOT_CONTAINED;
154             }
155             prevSpanLimit=spanLimit;
156         }
157         return result;
158     }
159     /**
160      * {@inheritDoc}
161      */
162     @Override
spanQuickCheckYes(CharSequence s)163     public int spanQuickCheckYes(CharSequence s) {
164         UnicodeSet.SpanCondition spanCondition=UnicodeSet.SpanCondition.SIMPLE;
165         for(int prevSpanLimit=0; prevSpanLimit<s.length();) {
166             int spanLimit=set.span(s, prevSpanLimit, spanCondition);
167             if(spanCondition==UnicodeSet.SpanCondition.NOT_CONTAINED) {
168                 spanCondition=UnicodeSet.SpanCondition.SIMPLE;
169             } else {
170                 int yesLimit=
171                     prevSpanLimit+
172                     norm2.spanQuickCheckYes(s.subSequence(prevSpanLimit, spanLimit));
173                 if(yesLimit<spanLimit) {
174                     return yesLimit;
175                 }
176                 spanCondition=UnicodeSet.SpanCondition.NOT_CONTAINED;
177             }
178             prevSpanLimit=spanLimit;
179         }
180         return s.length();
181     }
182 
183     /**
184      * {@inheritDoc}
185      */
186     @Override
hasBoundaryBefore(int c)187     public boolean hasBoundaryBefore(int c) {
188         return !set.contains(c) || norm2.hasBoundaryBefore(c);
189     }
190 
191     /**
192      * {@inheritDoc}
193      */
194     @Override
hasBoundaryAfter(int c)195     public boolean hasBoundaryAfter(int c) {
196         return !set.contains(c) || norm2.hasBoundaryAfter(c);
197     }
198 
199     /**
200      * {@inheritDoc}
201      */
202     @Override
isInert(int c)203     public boolean isInert(int c) {
204         return !set.contains(c) || norm2.isInert(c);
205     }
206 
207     // Internal: No argument checking, and appends to dest.
208     // Pass as input spanCondition the one that is likely to yield a non-zero
209     // span length at the start of src.
210     // For set=[:age=3.2:], since almost all common characters were in Unicode 3.2,
211     // UnicodeSet.SpanCondition.SIMPLE should be passed in for the start of src
212     // and UnicodeSet.SpanCondition.NOT_CONTAINED should be passed in if we continue after
213     // an in-filter prefix.
normalize(CharSequence src, Appendable dest, UnicodeSet.SpanCondition spanCondition)214     private Appendable normalize(CharSequence src, Appendable dest,
215                                  UnicodeSet.SpanCondition spanCondition) {
216         // Don't throw away destination buffer between iterations.
217         StringBuilder tempDest=new StringBuilder();
218         try {
219             for(int prevSpanLimit=0; prevSpanLimit<src.length();) {
220                 int spanLimit=set.span(src, prevSpanLimit, spanCondition);
221                 int spanLength=spanLimit-prevSpanLimit;
222                 if(spanCondition==UnicodeSet.SpanCondition.NOT_CONTAINED) {
223                     if(spanLength!=0) {
224                         dest.append(src, prevSpanLimit, spanLimit);
225                     }
226                     spanCondition=UnicodeSet.SpanCondition.SIMPLE;
227                 } else {
228                     if(spanLength!=0) {
229                         // Not norm2.normalizeSecondAndAppend() because we do not want
230                         // to modify the non-filter part of dest.
231                         dest.append(norm2.normalize(src.subSequence(prevSpanLimit, spanLimit), tempDest));
232                     }
233                     spanCondition=UnicodeSet.SpanCondition.NOT_CONTAINED;
234                 }
235                 prevSpanLimit=spanLimit;
236             }
237         } catch(IOException e) {
238             throw new ICUUncheckedIOException(e);
239         }
240         return dest;
241     }
242 
normalizeSecondAndAppend(StringBuilder first, CharSequence second, boolean doNormalize)243     private StringBuilder normalizeSecondAndAppend(StringBuilder first, CharSequence second,
244                                                    boolean doNormalize) {
245         if(first==second) {
246             throw new IllegalArgumentException();
247         }
248         if(first.length()==0) {
249             if(doNormalize) {
250                 return normalize(second, first);
251             } else {
252                 return first.append(second);
253             }
254         }
255         // merge the in-filter suffix of the first string with the in-filter prefix of the second
256         int prefixLimit=set.span(second, 0, UnicodeSet.SpanCondition.SIMPLE);
257         if(prefixLimit!=0) {
258             CharSequence prefix=second.subSequence(0, prefixLimit);
259             int suffixStart=set.spanBack(first, 0x7fffffff, UnicodeSet.SpanCondition.SIMPLE);
260             if(suffixStart==0) {
261                 if(doNormalize) {
262                     norm2.normalizeSecondAndAppend(first, prefix);
263                 } else {
264                     norm2.append(first, prefix);
265                 }
266             } else {
267                 StringBuilder middle=new StringBuilder(
268                         first.subSequence(suffixStart, first.length()));
269                 if(doNormalize) {
270                     norm2.normalizeSecondAndAppend(middle, prefix);
271                 } else {
272                     norm2.append(middle, prefix);
273                 }
274                 first.delete(suffixStart, 0x7fffffff).append(middle);
275             }
276         }
277         if(prefixLimit<second.length()) {
278             CharSequence rest=second.subSequence(prefixLimit, second.length());
279             if(doNormalize) {
280                 normalize(rest, first, UnicodeSet.SpanCondition.NOT_CONTAINED);
281             } else {
282                 first.append(rest);
283             }
284         }
285         return first;
286     }
287 
288     private Normalizer2 norm2;
289     private UnicodeSet set;
290 };
291