1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2014, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 * norm2allmodes.h 9 * 10 * created on: 2014sep07 11 * created by: Markus W. Scherer 12 */ 13 14 #ifndef __NORM2ALLMODES_H__ 15 #define __NORM2ALLMODES_H__ 16 17 #include "unicode/utypes.h" 18 19 #if !UCONFIG_NO_NORMALIZATION 20 21 #include "unicode/edits.h" 22 #include "unicode/normalizer2.h" 23 #include "unicode/stringoptions.h" 24 #include "unicode/unistr.h" 25 #include "cpputils.h" 26 #include "normalizer2impl.h" 27 28 U_NAMESPACE_BEGIN 29 30 // Intermediate class: 31 // Has Normalizer2Impl and does boilerplate argument checking and setup. 32 class Normalizer2WithImpl : public Normalizer2 { 33 public: Normalizer2WithImpl(const Normalizer2Impl & ni)34 Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {} 35 virtual ~Normalizer2WithImpl(); 36 37 // normalize 38 virtual UnicodeString & normalize(const UnicodeString & src,UnicodeString & dest,UErrorCode & errorCode)39 normalize(const UnicodeString &src, 40 UnicodeString &dest, 41 UErrorCode &errorCode) const { 42 if(U_FAILURE(errorCode)) { 43 dest.setToBogus(); 44 return dest; 45 } 46 const UChar *sArray=src.getBuffer(); 47 if(&dest==&src || sArray==NULL) { 48 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 49 dest.setToBogus(); 50 return dest; 51 } 52 dest.remove(); 53 ReorderingBuffer buffer(impl, dest); 54 if(buffer.init(src.length(), errorCode)) { 55 normalize(sArray, sArray+src.length(), buffer, errorCode); 56 } 57 return dest; 58 } 59 virtual void 60 normalize(const UChar *src, const UChar *limit, 61 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; 62 63 // normalize and append 64 virtual UnicodeString & normalizeSecondAndAppend(UnicodeString & first,const UnicodeString & second,UErrorCode & errorCode)65 normalizeSecondAndAppend(UnicodeString &first, 66 const UnicodeString &second, 67 UErrorCode &errorCode) const { 68 return normalizeSecondAndAppend(first, second, TRUE, errorCode); 69 } 70 virtual UnicodeString & append(UnicodeString & first,const UnicodeString & second,UErrorCode & errorCode)71 append(UnicodeString &first, 72 const UnicodeString &second, 73 UErrorCode &errorCode) const { 74 return normalizeSecondAndAppend(first, second, FALSE, errorCode); 75 } 76 UnicodeString & normalizeSecondAndAppend(UnicodeString & first,const UnicodeString & second,UBool doNormalize,UErrorCode & errorCode)77 normalizeSecondAndAppend(UnicodeString &first, 78 const UnicodeString &second, 79 UBool doNormalize, 80 UErrorCode &errorCode) const { 81 uprv_checkCanGetBuffer(first, errorCode); 82 if(U_FAILURE(errorCode)) { 83 return first; 84 } 85 const UChar *secondArray=second.getBuffer(); 86 if(&first==&second || secondArray==NULL) { 87 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 88 return first; 89 } 90 int32_t firstLength=first.length(); 91 UnicodeString safeMiddle; 92 { 93 ReorderingBuffer buffer(impl, first); 94 if(buffer.init(firstLength+second.length(), errorCode)) { 95 normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize, 96 safeMiddle, buffer, errorCode); 97 } 98 } // The ReorderingBuffer destructor finalizes the first string. 99 if(U_FAILURE(errorCode)) { 100 // Restore the modified suffix of the first string. 101 first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle); 102 } 103 return first; 104 } 105 virtual void 106 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 107 UnicodeString &safeMiddle, 108 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; 109 virtual UBool getDecomposition(UChar32 c,UnicodeString & decomposition)110 getDecomposition(UChar32 c, UnicodeString &decomposition) const { 111 UChar buffer[4]; 112 int32_t length; 113 const UChar *d=impl.getDecomposition(c, buffer, length); 114 if(d==NULL) { 115 return FALSE; 116 } 117 if(d==buffer) { 118 decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c) 119 } else { 120 decomposition.setTo(FALSE, d, length); // read-only alias 121 } 122 return TRUE; 123 } 124 virtual UBool getRawDecomposition(UChar32 c,UnicodeString & decomposition)125 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const { 126 UChar buffer[30]; 127 int32_t length; 128 const UChar *d=impl.getRawDecomposition(c, buffer, length); 129 if(d==NULL) { 130 return FALSE; 131 } 132 if(d==buffer) { 133 decomposition.setTo(buffer, length); // copy the string (algorithmic decomposition) 134 } else { 135 decomposition.setTo(FALSE, d, length); // read-only alias 136 } 137 return TRUE; 138 } 139 virtual UChar32 composePair(UChar32 a,UChar32 b)140 composePair(UChar32 a, UChar32 b) const { 141 return impl.composePair(a, b); 142 } 143 144 virtual uint8_t getCombiningClass(UChar32 c)145 getCombiningClass(UChar32 c) const { 146 return impl.getCC(impl.getNorm16(c)); 147 } 148 149 // quick checks 150 virtual UBool isNormalized(const UnicodeString & s,UErrorCode & errorCode)151 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { 152 if(U_FAILURE(errorCode)) { 153 return FALSE; 154 } 155 const UChar *sArray=s.getBuffer(); 156 if(sArray==NULL) { 157 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 158 return FALSE; 159 } 160 const UChar *sLimit=sArray+s.length(); 161 return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode); 162 } 163 virtual UNormalizationCheckResult quickCheck(const UnicodeString & s,UErrorCode & errorCode)164 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { 165 return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO; 166 } 167 virtual int32_t spanQuickCheckYes(const UnicodeString & s,UErrorCode & errorCode)168 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const { 169 if(U_FAILURE(errorCode)) { 170 return 0; 171 } 172 const UChar *sArray=s.getBuffer(); 173 if(sArray==NULL) { 174 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 175 return 0; 176 } 177 return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray); 178 } 179 virtual const UChar * 180 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0; 181 getQuickCheck(UChar32)182 virtual UNormalizationCheckResult getQuickCheck(UChar32) const { 183 return UNORM_YES; 184 } 185 186 const Normalizer2Impl &impl; 187 }; 188 189 class DecomposeNormalizer2 : public Normalizer2WithImpl { 190 public: DecomposeNormalizer2(const Normalizer2Impl & ni)191 DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} 192 virtual ~DecomposeNormalizer2(); 193 194 private: 195 virtual void normalize(const UChar * src,const UChar * limit,ReorderingBuffer & buffer,UErrorCode & errorCode)196 normalize(const UChar *src, const UChar *limit, 197 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 198 impl.decompose(src, limit, &buffer, errorCode); 199 } 200 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. 201 virtual void normalizeAndAppend(const UChar * src,const UChar * limit,UBool doNormalize,UnicodeString & safeMiddle,ReorderingBuffer & buffer,UErrorCode & errorCode)202 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 203 UnicodeString &safeMiddle, 204 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 205 impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); 206 } 207 virtual const UChar * spanQuickCheckYes(const UChar * src,const UChar * limit,UErrorCode & errorCode)208 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { 209 return impl.decompose(src, limit, NULL, errorCode); 210 } 211 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. getQuickCheck(UChar32 c)212 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { 213 return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO; 214 } hasBoundaryBefore(UChar32 c)215 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundaryBefore(c); } hasBoundaryAfter(UChar32 c)216 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundaryAfter(c); } isInert(UChar32 c)217 virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); } 218 }; 219 220 class ComposeNormalizer2 : public Normalizer2WithImpl { 221 public: ComposeNormalizer2(const Normalizer2Impl & ni,UBool fcc)222 ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) : 223 Normalizer2WithImpl(ni), onlyContiguous(fcc) {} 224 virtual ~ComposeNormalizer2(); 225 226 private: 227 virtual void normalize(const UChar * src,const UChar * limit,ReorderingBuffer & buffer,UErrorCode & errorCode)228 normalize(const UChar *src, const UChar *limit, 229 ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE { 230 impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode); 231 } 232 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. 233 234 void normalizeUTF8(uint32_t options,StringPiece src,ByteSink & sink,Edits * edits,UErrorCode & errorCode)235 normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink, 236 Edits *edits, UErrorCode &errorCode) const U_OVERRIDE { 237 if (U_FAILURE(errorCode)) { 238 return; 239 } 240 if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) { 241 edits->reset(); 242 } 243 const uint8_t *s = reinterpret_cast<const uint8_t *>(src.data()); 244 impl.composeUTF8(options, onlyContiguous, s, s + src.length(), 245 &sink, edits, errorCode); 246 sink.Flush(); 247 } 248 249 virtual void normalizeAndAppend(const UChar * src,const UChar * limit,UBool doNormalize,UnicodeString & safeMiddle,ReorderingBuffer & buffer,UErrorCode & errorCode)250 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 251 UnicodeString &safeMiddle, 252 ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE { 253 impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode); 254 } 255 256 virtual UBool isNormalized(const UnicodeString & s,UErrorCode & errorCode)257 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE { 258 if(U_FAILURE(errorCode)) { 259 return FALSE; 260 } 261 const UChar *sArray=s.getBuffer(); 262 if(sArray==NULL) { 263 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 264 return FALSE; 265 } 266 UnicodeString temp; 267 ReorderingBuffer buffer(impl, temp); 268 if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization 269 return FALSE; 270 } 271 return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode); 272 } 273 virtual UBool isNormalizedUTF8(StringPiece sp,UErrorCode & errorCode)274 isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const U_OVERRIDE { 275 if(U_FAILURE(errorCode)) { 276 return FALSE; 277 } 278 const uint8_t *s = reinterpret_cast<const uint8_t *>(sp.data()); 279 return impl.composeUTF8(0, onlyContiguous, s, s + sp.length(), nullptr, nullptr, errorCode); 280 } 281 virtual UNormalizationCheckResult quickCheck(const UnicodeString & s,UErrorCode & errorCode)282 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE { 283 if(U_FAILURE(errorCode)) { 284 return UNORM_MAYBE; 285 } 286 const UChar *sArray=s.getBuffer(); 287 if(sArray==NULL) { 288 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 289 return UNORM_MAYBE; 290 } 291 UNormalizationCheckResult qcResult=UNORM_YES; 292 impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult); 293 return qcResult; 294 } 295 virtual const UChar * spanQuickCheckYes(const UChar * src,const UChar * limit,UErrorCode &)296 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const U_OVERRIDE { 297 return impl.composeQuickCheck(src, limit, onlyContiguous, NULL); 298 } 299 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. getQuickCheck(UChar32 c)300 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const U_OVERRIDE { 301 return impl.getCompQuickCheck(impl.getNorm16(c)); 302 } hasBoundaryBefore(UChar32 c)303 virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE { 304 return impl.hasCompBoundaryBefore(c); 305 } hasBoundaryAfter(UChar32 c)306 virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE { 307 return impl.hasCompBoundaryAfter(c, onlyContiguous); 308 } isInert(UChar32 c)309 virtual UBool isInert(UChar32 c) const U_OVERRIDE { 310 return impl.isCompInert(c, onlyContiguous); 311 } 312 313 const UBool onlyContiguous; 314 }; 315 316 class FCDNormalizer2 : public Normalizer2WithImpl { 317 public: FCDNormalizer2(const Normalizer2Impl & ni)318 FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} 319 virtual ~FCDNormalizer2(); 320 321 private: 322 virtual void normalize(const UChar * src,const UChar * limit,ReorderingBuffer & buffer,UErrorCode & errorCode)323 normalize(const UChar *src, const UChar *limit, 324 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 325 impl.makeFCD(src, limit, &buffer, errorCode); 326 } 327 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. 328 virtual void normalizeAndAppend(const UChar * src,const UChar * limit,UBool doNormalize,UnicodeString & safeMiddle,ReorderingBuffer & buffer,UErrorCode & errorCode)329 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 330 UnicodeString &safeMiddle, 331 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 332 impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); 333 } 334 virtual const UChar * spanQuickCheckYes(const UChar * src,const UChar * limit,UErrorCode & errorCode)335 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { 336 return impl.makeFCD(src, limit, NULL, errorCode); 337 } 338 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. hasBoundaryBefore(UChar32 c)339 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); } hasBoundaryAfter(UChar32 c)340 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); } isInert(UChar32 c)341 virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); } 342 }; 343 344 struct Norm2AllModes : public UMemory { Norm2AllModesNorm2AllModes345 Norm2AllModes(Normalizer2Impl *i) 346 : impl(i), comp(*i, FALSE), decomp(*i), fcd(*i), fcc(*i, TRUE) {} 347 ~Norm2AllModes(); 348 349 static Norm2AllModes *createInstance(Normalizer2Impl *impl, UErrorCode &errorCode); 350 static Norm2AllModes *createNFCInstance(UErrorCode &errorCode); 351 static Norm2AllModes *createInstance(const char *packageName, 352 const char *name, 353 UErrorCode &errorCode); 354 355 static const Norm2AllModes *getNFCInstance(UErrorCode &errorCode); 356 static const Norm2AllModes *getNFKCInstance(UErrorCode &errorCode); 357 static const Norm2AllModes *getNFKC_CFInstance(UErrorCode &errorCode); 358 359 Normalizer2Impl *impl; 360 ComposeNormalizer2 comp; 361 DecomposeNormalizer2 decomp; 362 FCDNormalizer2 fcd; 363 ComposeNormalizer2 fcc; 364 }; 365 366 U_NAMESPACE_END 367 368 #endif // !UCONFIG_NO_NORMALIZATION 369 #endif // __NORM2ALLMODES_H__ 370