1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2014, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 * norm2allmodes.h 9 * 10 * created on: 2014sep07 11 * created by: Markus W. Scherer 12 */ 13 14 #ifndef __NORM2ALLMODES_H__ 15 #define __NORM2ALLMODES_H__ 16 17 #include "unicode/utypes.h" 18 19 #if !UCONFIG_NO_NORMALIZATION 20 21 #include "unicode/edits.h" 22 #include "unicode/normalizer2.h" 23 #include "unicode/stringoptions.h" 24 #include "unicode/unistr.h" 25 #include "cpputils.h" 26 #include "normalizer2impl.h" 27 28 U_NAMESPACE_BEGIN 29 30 // Intermediate class: 31 // Has Normalizer2Impl and does boilerplate argument checking and setup. 32 class Normalizer2WithImpl : public Normalizer2 { 33 public: Normalizer2WithImpl(const Normalizer2Impl & ni)34 Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {} 35 virtual ~Normalizer2WithImpl(); 36 37 // normalize 38 virtual UnicodeString & normalize(const UnicodeString & src,UnicodeString & dest,UErrorCode & errorCode)39 normalize(const UnicodeString &src, 40 UnicodeString &dest, 41 UErrorCode &errorCode) const U_OVERRIDE { 42 if(U_FAILURE(errorCode)) { 43 dest.setToBogus(); 44 return dest; 45 } 46 const UChar *sArray=src.getBuffer(); 47 if(&dest==&src || sArray==NULL) { 48 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 49 dest.setToBogus(); 50 return dest; 51 } 52 dest.remove(); 53 ReorderingBuffer buffer(impl, dest); 54 if(buffer.init(src.length(), errorCode)) { 55 normalize(sArray, sArray+src.length(), buffer, errorCode); 56 } 57 return dest; 58 } 59 virtual void 60 normalize(const UChar *src, const UChar *limit, 61 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; 62 63 // normalize and append 64 virtual UnicodeString & normalizeSecondAndAppend(UnicodeString & first,const UnicodeString & second,UErrorCode & errorCode)65 normalizeSecondAndAppend(UnicodeString &first, 66 const UnicodeString &second, 67 UErrorCode &errorCode) const U_OVERRIDE { 68 return normalizeSecondAndAppend(first, second, true, errorCode); 69 } 70 virtual UnicodeString & append(UnicodeString & first,const UnicodeString & second,UErrorCode & errorCode)71 append(UnicodeString &first, 72 const UnicodeString &second, 73 UErrorCode &errorCode) const U_OVERRIDE { 74 return normalizeSecondAndAppend(first, second, false, errorCode); 75 } 76 UnicodeString & normalizeSecondAndAppend(UnicodeString & first,const UnicodeString & second,UBool doNormalize,UErrorCode & errorCode)77 normalizeSecondAndAppend(UnicodeString &first, 78 const UnicodeString &second, 79 UBool doNormalize, 80 UErrorCode &errorCode) const { 81 uprv_checkCanGetBuffer(first, errorCode); 82 if(U_FAILURE(errorCode)) { 83 return first; 84 } 85 const UChar *secondArray=second.getBuffer(); 86 if(&first==&second || secondArray==NULL) { 87 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 88 return first; 89 } 90 int32_t firstLength=first.length(); 91 UnicodeString safeMiddle; 92 { 93 ReorderingBuffer buffer(impl, first); 94 if(buffer.init(firstLength+second.length(), errorCode)) { 95 normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize, 96 safeMiddle, buffer, errorCode); 97 } 98 } // The ReorderingBuffer destructor finalizes the first string. 99 if(U_FAILURE(errorCode)) { 100 // Restore the modified suffix of the first string. 101 first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle); 102 } 103 return first; 104 } 105 virtual void 106 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 107 UnicodeString &safeMiddle, 108 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; 109 virtual UBool getDecomposition(UChar32 c,UnicodeString & decomposition)110 getDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE { 111 UChar buffer[4]; 112 int32_t length; 113 const UChar *d=impl.getDecomposition(c, buffer, length); 114 if(d==NULL) { 115 return false; 116 } 117 if(d==buffer) { 118 decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c) 119 } else { 120 decomposition.setTo(false, d, length); // read-only alias 121 } 122 return true; 123 } 124 virtual UBool getRawDecomposition(UChar32 c,UnicodeString & decomposition)125 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE { 126 UChar buffer[30]; 127 int32_t length; 128 const UChar *d=impl.getRawDecomposition(c, buffer, length); 129 if(d==NULL) { 130 return false; 131 } 132 if(d==buffer) { 133 decomposition.setTo(buffer, length); // copy the string (algorithmic decomposition) 134 } else { 135 decomposition.setTo(false, d, length); // read-only alias 136 } 137 return true; 138 } 139 virtual UChar32 composePair(UChar32 a,UChar32 b)140 composePair(UChar32 a, UChar32 b) const U_OVERRIDE { 141 return impl.composePair(a, b); 142 } 143 144 virtual uint8_t getCombiningClass(UChar32 c)145 getCombiningClass(UChar32 c) const U_OVERRIDE { 146 return impl.getCC(impl.getNorm16(c)); 147 } 148 149 // quick checks 150 virtual UBool isNormalized(const UnicodeString & s,UErrorCode & errorCode)151 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE { 152 if(U_FAILURE(errorCode)) { 153 return false; 154 } 155 const UChar *sArray=s.getBuffer(); 156 if(sArray==NULL) { 157 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 158 return false; 159 } 160 const UChar *sLimit=sArray+s.length(); 161 return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode); 162 } 163 virtual UNormalizationCheckResult quickCheck(const UnicodeString & s,UErrorCode & errorCode)164 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE { 165 return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO; 166 } 167 virtual int32_t spanQuickCheckYes(const UnicodeString & s,UErrorCode & errorCode)168 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE { 169 if(U_FAILURE(errorCode)) { 170 return 0; 171 } 172 const UChar *sArray=s.getBuffer(); 173 if(sArray==NULL) { 174 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 175 return 0; 176 } 177 return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray); 178 } 179 virtual const UChar * 180 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0; 181 getQuickCheck(UChar32)182 virtual UNormalizationCheckResult getQuickCheck(UChar32) const { 183 return UNORM_YES; 184 } 185 186 const Normalizer2Impl &impl; 187 }; 188 189 class DecomposeNormalizer2 : public Normalizer2WithImpl { 190 public: DecomposeNormalizer2(const Normalizer2Impl & ni)191 DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} 192 virtual ~DecomposeNormalizer2(); 193 194 private: 195 virtual void normalize(const UChar * src,const UChar * limit,ReorderingBuffer & buffer,UErrorCode & errorCode)196 normalize(const UChar *src, const UChar *limit, 197 ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE { 198 impl.decompose(src, limit, &buffer, errorCode); 199 } 200 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. 201 virtual void normalizeAndAppend(const UChar * src,const UChar * limit,UBool doNormalize,UnicodeString & safeMiddle,ReorderingBuffer & buffer,UErrorCode & errorCode)202 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 203 UnicodeString &safeMiddle, 204 ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE { 205 impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); 206 } 207 208 void normalizeUTF8(uint32_t options,StringPiece src,ByteSink & sink,Edits * edits,UErrorCode & errorCode)209 normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink, 210 Edits *edits, UErrorCode &errorCode) const U_OVERRIDE { 211 if (U_FAILURE(errorCode)) { 212 return; 213 } 214 if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) { 215 edits->reset(); 216 } 217 const uint8_t *s = reinterpret_cast<const uint8_t *>(src.data()); 218 impl.decomposeUTF8(options, s, s + src.length(), &sink, edits, errorCode); 219 sink.Flush(); 220 } 221 virtual UBool isNormalizedUTF8(StringPiece sp,UErrorCode & errorCode)222 isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const U_OVERRIDE { 223 if(U_FAILURE(errorCode)) { 224 return false; 225 } 226 const uint8_t *s = reinterpret_cast<const uint8_t *>(sp.data()); 227 const uint8_t *sLimit = s + sp.length(); 228 return sLimit == impl.decomposeUTF8(0, s, sLimit, nullptr, nullptr, errorCode); 229 } 230 231 virtual const UChar * spanQuickCheckYes(const UChar * src,const UChar * limit,UErrorCode & errorCode)232 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const U_OVERRIDE { 233 return impl.decompose(src, limit, NULL, errorCode); 234 } 235 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. getQuickCheck(UChar32 c)236 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const U_OVERRIDE { 237 return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO; 238 } hasBoundaryBefore(UChar32 c)239 virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE { 240 return impl.hasDecompBoundaryBefore(c); 241 } hasBoundaryAfter(UChar32 c)242 virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE { 243 return impl.hasDecompBoundaryAfter(c); 244 } isInert(UChar32 c)245 virtual UBool isInert(UChar32 c) const U_OVERRIDE { 246 return impl.isDecompInert(c); 247 } 248 }; 249 250 class ComposeNormalizer2 : public Normalizer2WithImpl { 251 public: ComposeNormalizer2(const Normalizer2Impl & ni,UBool fcc)252 ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) : 253 Normalizer2WithImpl(ni), onlyContiguous(fcc) {} 254 virtual ~ComposeNormalizer2(); 255 256 private: 257 virtual void normalize(const UChar * src,const UChar * limit,ReorderingBuffer & buffer,UErrorCode & errorCode)258 normalize(const UChar *src, const UChar *limit, 259 ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE { 260 impl.compose(src, limit, onlyContiguous, true, buffer, errorCode); 261 } 262 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. 263 264 void normalizeUTF8(uint32_t options,StringPiece src,ByteSink & sink,Edits * edits,UErrorCode & errorCode)265 normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink, 266 Edits *edits, UErrorCode &errorCode) const U_OVERRIDE { 267 if (U_FAILURE(errorCode)) { 268 return; 269 } 270 if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) { 271 edits->reset(); 272 } 273 const uint8_t *s = reinterpret_cast<const uint8_t *>(src.data()); 274 impl.composeUTF8(options, onlyContiguous, s, s + src.length(), 275 &sink, edits, errorCode); 276 sink.Flush(); 277 } 278 279 virtual void normalizeAndAppend(const UChar * src,const UChar * limit,UBool doNormalize,UnicodeString & safeMiddle,ReorderingBuffer & buffer,UErrorCode & errorCode)280 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 281 UnicodeString &safeMiddle, 282 ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE { 283 impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode); 284 } 285 286 virtual UBool isNormalized(const UnicodeString & s,UErrorCode & errorCode)287 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE { 288 if(U_FAILURE(errorCode)) { 289 return false; 290 } 291 const UChar *sArray=s.getBuffer(); 292 if(sArray==NULL) { 293 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 294 return false; 295 } 296 UnicodeString temp; 297 ReorderingBuffer buffer(impl, temp); 298 if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization 299 return false; 300 } 301 return impl.compose(sArray, sArray+s.length(), onlyContiguous, false, buffer, errorCode); 302 } 303 virtual UBool isNormalizedUTF8(StringPiece sp,UErrorCode & errorCode)304 isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const U_OVERRIDE { 305 if(U_FAILURE(errorCode)) { 306 return false; 307 } 308 const uint8_t *s = reinterpret_cast<const uint8_t *>(sp.data()); 309 return impl.composeUTF8(0, onlyContiguous, s, s + sp.length(), nullptr, nullptr, errorCode); 310 } 311 virtual UNormalizationCheckResult quickCheck(const UnicodeString & s,UErrorCode & errorCode)312 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE { 313 if(U_FAILURE(errorCode)) { 314 return UNORM_MAYBE; 315 } 316 const UChar *sArray=s.getBuffer(); 317 if(sArray==NULL) { 318 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 319 return UNORM_MAYBE; 320 } 321 UNormalizationCheckResult qcResult=UNORM_YES; 322 impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult); 323 return qcResult; 324 } 325 virtual const UChar * spanQuickCheckYes(const UChar * src,const UChar * limit,UErrorCode &)326 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const U_OVERRIDE { 327 return impl.composeQuickCheck(src, limit, onlyContiguous, NULL); 328 } 329 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. getQuickCheck(UChar32 c)330 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const U_OVERRIDE { 331 return impl.getCompQuickCheck(impl.getNorm16(c)); 332 } hasBoundaryBefore(UChar32 c)333 virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE { 334 return impl.hasCompBoundaryBefore(c); 335 } hasBoundaryAfter(UChar32 c)336 virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE { 337 return impl.hasCompBoundaryAfter(c, onlyContiguous); 338 } isInert(UChar32 c)339 virtual UBool isInert(UChar32 c) const U_OVERRIDE { 340 return impl.isCompInert(c, onlyContiguous); 341 } 342 343 const UBool onlyContiguous; 344 }; 345 346 class FCDNormalizer2 : public Normalizer2WithImpl { 347 public: FCDNormalizer2(const Normalizer2Impl & ni)348 FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} 349 virtual ~FCDNormalizer2(); 350 351 private: 352 virtual void normalize(const UChar * src,const UChar * limit,ReorderingBuffer & buffer,UErrorCode & errorCode)353 normalize(const UChar *src, const UChar *limit, 354 ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE { 355 impl.makeFCD(src, limit, &buffer, errorCode); 356 } 357 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. 358 virtual void normalizeAndAppend(const UChar * src,const UChar * limit,UBool doNormalize,UnicodeString & safeMiddle,ReorderingBuffer & buffer,UErrorCode & errorCode)359 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 360 UnicodeString &safeMiddle, 361 ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE { 362 impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); 363 } 364 virtual const UChar * spanQuickCheckYes(const UChar * src,const UChar * limit,UErrorCode & errorCode)365 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const U_OVERRIDE { 366 return impl.makeFCD(src, limit, NULL, errorCode); 367 } 368 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. hasBoundaryBefore(UChar32 c)369 virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE { 370 return impl.hasFCDBoundaryBefore(c); 371 } hasBoundaryAfter(UChar32 c)372 virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE { 373 return impl.hasFCDBoundaryAfter(c); 374 } isInert(UChar32 c)375 virtual UBool isInert(UChar32 c) const U_OVERRIDE { 376 return impl.isFCDInert(c); 377 } 378 }; 379 380 struct Norm2AllModes : public UMemory { Norm2AllModesNorm2AllModes381 Norm2AllModes(Normalizer2Impl *i) 382 : impl(i), comp(*i, false), decomp(*i), fcd(*i), fcc(*i, true) {} 383 ~Norm2AllModes(); 384 385 static Norm2AllModes *createInstance(Normalizer2Impl *impl, UErrorCode &errorCode); 386 static Norm2AllModes *createNFCInstance(UErrorCode &errorCode); 387 static Norm2AllModes *createInstance(const char *packageName, 388 const char *name, 389 UErrorCode &errorCode); 390 391 static const Norm2AllModes *getNFCInstance(UErrorCode &errorCode); 392 static const Norm2AllModes *getNFKCInstance(UErrorCode &errorCode); 393 static const Norm2AllModes *getNFKC_CFInstance(UErrorCode &errorCode); 394 395 Normalizer2Impl *impl; 396 ComposeNormalizer2 comp; 397 DecomposeNormalizer2 decomp; 398 FCDNormalizer2 fcd; 399 ComposeNormalizer2 fcc; 400 }; 401 402 U_NAMESPACE_END 403 404 #endif // !UCONFIG_NO_NORMALIZATION 405 #endif // __NORM2ALLMODES_H__ 406