1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * Copyright (C) 2009-2014, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 ******************************************************************************* 9 */ 10 package ohos.global.icu.text; 11 12 import java.io.IOException; 13 14 import ohos.global.icu.util.ICUUncheckedIOException; 15 16 /** 17 * Normalization filtered by a UnicodeSet. 18 * Normalizes portions of the text contained in the filter set and leaves 19 * portions not contained in the filter set unchanged. 20 * Filtering is done via UnicodeSet.span(..., UnicodeSet.SpanCondition.SIMPLE). 21 * Not-in-the-filter text is treated as "is normalized" and "quick check yes". 22 * This class implements all of (and only) the Normalizer2 API. 23 * An instance of this class is unmodifiable/immutable. 24 * @author Markus W. Scherer 25 * @hide exposed on OHOS 26 */ 27 public class FilteredNormalizer2 extends Normalizer2 { 28 /** 29 * Constructs a filtered normalizer wrapping any Normalizer2 instance 30 * and a filter set. 31 * Both are aliased and must not be modified or deleted while this object 32 * is used. 33 * The filter set should be frozen; otherwise the performance will suffer greatly. 34 * @param n2 wrapped Normalizer2 instance 35 * @param filterSet UnicodeSet which determines the characters to be normalized 36 */ FilteredNormalizer2(Normalizer2 n2, UnicodeSet filterSet)37 public FilteredNormalizer2(Normalizer2 n2, UnicodeSet filterSet) { 38 norm2=n2; 39 set=filterSet; 40 } 41 42 /** 43 * {@inheritDoc} 44 */ 45 @Override normalize(CharSequence src, StringBuilder dest)46 public StringBuilder normalize(CharSequence src, StringBuilder dest) { 47 if(dest==src) { 48 throw new IllegalArgumentException(); 49 } 50 dest.setLength(0); 51 normalize(src, dest, UnicodeSet.SpanCondition.SIMPLE); 52 return dest; 53 } 54 /** 55 * {@inheritDoc} 56 */ 57 @Override normalize(CharSequence src, Appendable dest)58 public Appendable normalize(CharSequence src, Appendable dest) { 59 if(dest==src) { 60 throw new IllegalArgumentException(); 61 } 62 return normalize(src, dest, UnicodeSet.SpanCondition.SIMPLE); 63 } 64 65 /** 66 * {@inheritDoc} 67 */ 68 @Override normalizeSecondAndAppend( StringBuilder first, CharSequence second)69 public StringBuilder normalizeSecondAndAppend( 70 StringBuilder first, CharSequence second) { 71 return normalizeSecondAndAppend(first, second, true); 72 } 73 /** 74 * {@inheritDoc} 75 */ 76 @Override append(StringBuilder first, CharSequence second)77 public StringBuilder append(StringBuilder first, CharSequence second) { 78 return normalizeSecondAndAppend(first, second, false); 79 } 80 81 /** 82 * {@inheritDoc} 83 */ 84 @Override getDecomposition(int c)85 public String getDecomposition(int c) { 86 return set.contains(c) ? norm2.getDecomposition(c) : null; 87 } 88 89 /** 90 * {@inheritDoc} 91 */ 92 @Override getRawDecomposition(int c)93 public String getRawDecomposition(int c) { 94 return set.contains(c) ? norm2.getRawDecomposition(c) : null; 95 } 96 97 /** 98 * {@inheritDoc} 99 */ 100 @Override composePair(int a, int b)101 public int composePair(int a, int b) { 102 return (set.contains(a) && set.contains(b)) ? norm2.composePair(a, b) : -1; 103 } 104 105 /** 106 * {@inheritDoc} 107 */ 108 @Override getCombiningClass(int c)109 public int getCombiningClass(int c) { 110 return set.contains(c) ? norm2.getCombiningClass(c) : 0; 111 } 112 113 /** 114 * {@inheritDoc} 115 */ 116 @Override isNormalized(CharSequence s)117 public boolean isNormalized(CharSequence s) { 118 UnicodeSet.SpanCondition spanCondition=UnicodeSet.SpanCondition.SIMPLE; 119 for(int prevSpanLimit=0; prevSpanLimit<s.length();) { 120 int spanLimit=set.span(s, prevSpanLimit, spanCondition); 121 if(spanCondition==UnicodeSet.SpanCondition.NOT_CONTAINED) { 122 spanCondition=UnicodeSet.SpanCondition.SIMPLE; 123 } else { 124 if(!norm2.isNormalized(s.subSequence(prevSpanLimit, spanLimit))) { 125 return false; 126 } 127 spanCondition=UnicodeSet.SpanCondition.NOT_CONTAINED; 128 } 129 prevSpanLimit=spanLimit; 130 } 131 return true; 132 } 133 134 /** 135 * {@inheritDoc} 136 */ 137 @Override quickCheck(CharSequence s)138 public Normalizer.QuickCheckResult quickCheck(CharSequence s) { 139 Normalizer.QuickCheckResult result=Normalizer.YES; 140 UnicodeSet.SpanCondition spanCondition=UnicodeSet.SpanCondition.SIMPLE; 141 for(int prevSpanLimit=0; prevSpanLimit<s.length();) { 142 int spanLimit=set.span(s, prevSpanLimit, spanCondition); 143 if(spanCondition==UnicodeSet.SpanCondition.NOT_CONTAINED) { 144 spanCondition=UnicodeSet.SpanCondition.SIMPLE; 145 } else { 146 Normalizer.QuickCheckResult qcResult= 147 norm2.quickCheck(s.subSequence(prevSpanLimit, spanLimit)); 148 if(qcResult==Normalizer.NO) { 149 return qcResult; 150 } else if(qcResult==Normalizer.MAYBE) { 151 result=qcResult; 152 } 153 spanCondition=UnicodeSet.SpanCondition.NOT_CONTAINED; 154 } 155 prevSpanLimit=spanLimit; 156 } 157 return result; 158 } 159 /** 160 * {@inheritDoc} 161 */ 162 @Override spanQuickCheckYes(CharSequence s)163 public int spanQuickCheckYes(CharSequence s) { 164 UnicodeSet.SpanCondition spanCondition=UnicodeSet.SpanCondition.SIMPLE; 165 for(int prevSpanLimit=0; prevSpanLimit<s.length();) { 166 int spanLimit=set.span(s, prevSpanLimit, spanCondition); 167 if(spanCondition==UnicodeSet.SpanCondition.NOT_CONTAINED) { 168 spanCondition=UnicodeSet.SpanCondition.SIMPLE; 169 } else { 170 int yesLimit= 171 prevSpanLimit+ 172 norm2.spanQuickCheckYes(s.subSequence(prevSpanLimit, spanLimit)); 173 if(yesLimit<spanLimit) { 174 return yesLimit; 175 } 176 spanCondition=UnicodeSet.SpanCondition.NOT_CONTAINED; 177 } 178 prevSpanLimit=spanLimit; 179 } 180 return s.length(); 181 } 182 183 /** 184 * {@inheritDoc} 185 */ 186 @Override hasBoundaryBefore(int c)187 public boolean hasBoundaryBefore(int c) { 188 return !set.contains(c) || norm2.hasBoundaryBefore(c); 189 } 190 191 /** 192 * {@inheritDoc} 193 */ 194 @Override hasBoundaryAfter(int c)195 public boolean hasBoundaryAfter(int c) { 196 return !set.contains(c) || norm2.hasBoundaryAfter(c); 197 } 198 199 /** 200 * {@inheritDoc} 201 */ 202 @Override isInert(int c)203 public boolean isInert(int c) { 204 return !set.contains(c) || norm2.isInert(c); 205 } 206 207 // Internal: No argument checking, and appends to dest. 208 // Pass as input spanCondition the one that is likely to yield a non-zero 209 // span length at the start of src. 210 // For set=[:age=3.2:], since almost all common characters were in Unicode 3.2, 211 // UnicodeSet.SpanCondition.SIMPLE should be passed in for the start of src 212 // and UnicodeSet.SpanCondition.NOT_CONTAINED should be passed in if we continue after 213 // an in-filter prefix. normalize(CharSequence src, Appendable dest, UnicodeSet.SpanCondition spanCondition)214 private Appendable normalize(CharSequence src, Appendable dest, 215 UnicodeSet.SpanCondition spanCondition) { 216 // Don't throw away destination buffer between iterations. 217 StringBuilder tempDest=new StringBuilder(); 218 try { 219 for(int prevSpanLimit=0; prevSpanLimit<src.length();) { 220 int spanLimit=set.span(src, prevSpanLimit, spanCondition); 221 int spanLength=spanLimit-prevSpanLimit; 222 if(spanCondition==UnicodeSet.SpanCondition.NOT_CONTAINED) { 223 if(spanLength!=0) { 224 dest.append(src, prevSpanLimit, spanLimit); 225 } 226 spanCondition=UnicodeSet.SpanCondition.SIMPLE; 227 } else { 228 if(spanLength!=0) { 229 // Not norm2.normalizeSecondAndAppend() because we do not want 230 // to modify the non-filter part of dest. 231 dest.append(norm2.normalize(src.subSequence(prevSpanLimit, spanLimit), tempDest)); 232 } 233 spanCondition=UnicodeSet.SpanCondition.NOT_CONTAINED; 234 } 235 prevSpanLimit=spanLimit; 236 } 237 } catch(IOException e) { 238 throw new ICUUncheckedIOException(e); 239 } 240 return dest; 241 } 242 normalizeSecondAndAppend(StringBuilder first, CharSequence second, boolean doNormalize)243 private StringBuilder normalizeSecondAndAppend(StringBuilder first, CharSequence second, 244 boolean doNormalize) { 245 if(first==second) { 246 throw new IllegalArgumentException(); 247 } 248 if(first.length()==0) { 249 if(doNormalize) { 250 return normalize(second, first); 251 } else { 252 return first.append(second); 253 } 254 } 255 // merge the in-filter suffix of the first string with the in-filter prefix of the second 256 int prefixLimit=set.span(second, 0, UnicodeSet.SpanCondition.SIMPLE); 257 if(prefixLimit!=0) { 258 CharSequence prefix=second.subSequence(0, prefixLimit); 259 int suffixStart=set.spanBack(first, 0x7fffffff, UnicodeSet.SpanCondition.SIMPLE); 260 if(suffixStart==0) { 261 if(doNormalize) { 262 norm2.normalizeSecondAndAppend(first, prefix); 263 } else { 264 norm2.append(first, prefix); 265 } 266 } else { 267 StringBuilder middle=new StringBuilder( 268 first.subSequence(suffixStart, first.length())); 269 if(doNormalize) { 270 norm2.normalizeSecondAndAppend(middle, prefix); 271 } else { 272 norm2.append(middle, prefix); 273 } 274 first.delete(suffixStart, 0x7fffffff).append(middle); 275 } 276 } 277 if(prefixLimit<second.length()) { 278 CharSequence rest=second.subSequence(prefixLimit, second.length()); 279 if(doNormalize) { 280 normalize(rest, first, UnicodeSet.SpanCondition.NOT_CONTAINED); 281 } else { 282 first.append(rest); 283 } 284 } 285 return first; 286 } 287 288 private Normalizer2 norm2; 289 private UnicodeSet set; 290 }; 291