1 /* 2 ******************************************************************************* 3 * Copyright (C) 2014, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6 * loadednormalizer2impl.h 7 * 8 * created on: 2014sep07 9 * created by: Markus W. Scherer 10 */ 11 12 #ifndef __NORM2ALLMODES_H__ 13 #define __NORM2ALLMODES_H__ 14 15 #include "unicode/utypes.h" 16 17 #if !UCONFIG_NO_NORMALIZATION 18 19 #include "unicode/normalizer2.h" 20 #include "unicode/unistr.h" 21 #include "cpputils.h" 22 #include "normalizer2impl.h" 23 24 U_NAMESPACE_BEGIN 25 26 // Intermediate class: 27 // Has Normalizer2Impl and does boilerplate argument checking and setup. 28 class Normalizer2WithImpl : public Normalizer2 { 29 public: Normalizer2WithImpl(const Normalizer2Impl & ni)30 Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {} 31 virtual ~Normalizer2WithImpl(); 32 33 // normalize 34 virtual UnicodeString & normalize(const UnicodeString & src,UnicodeString & dest,UErrorCode & errorCode)35 normalize(const UnicodeString &src, 36 UnicodeString &dest, 37 UErrorCode &errorCode) const { 38 if(U_FAILURE(errorCode)) { 39 dest.setToBogus(); 40 return dest; 41 } 42 const UChar *sArray=src.getBuffer(); 43 if(&dest==&src || sArray==NULL) { 44 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 45 dest.setToBogus(); 46 return dest; 47 } 48 dest.remove(); 49 ReorderingBuffer buffer(impl, dest); 50 if(buffer.init(src.length(), errorCode)) { 51 normalize(sArray, sArray+src.length(), buffer, errorCode); 52 } 53 return dest; 54 } 55 virtual void 56 normalize(const UChar *src, const UChar *limit, 57 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; 58 59 // normalize and append 60 virtual UnicodeString & normalizeSecondAndAppend(UnicodeString & first,const UnicodeString & second,UErrorCode & errorCode)61 normalizeSecondAndAppend(UnicodeString &first, 62 const UnicodeString &second, 63 UErrorCode &errorCode) const { 64 return normalizeSecondAndAppend(first, second, TRUE, errorCode); 65 } 66 virtual UnicodeString & append(UnicodeString & first,const UnicodeString & second,UErrorCode & errorCode)67 append(UnicodeString &first, 68 const UnicodeString &second, 69 UErrorCode &errorCode) const { 70 return normalizeSecondAndAppend(first, second, FALSE, errorCode); 71 } 72 UnicodeString & normalizeSecondAndAppend(UnicodeString & first,const UnicodeString & second,UBool doNormalize,UErrorCode & errorCode)73 normalizeSecondAndAppend(UnicodeString &first, 74 const UnicodeString &second, 75 UBool doNormalize, 76 UErrorCode &errorCode) const { 77 uprv_checkCanGetBuffer(first, errorCode); 78 if(U_FAILURE(errorCode)) { 79 return first; 80 } 81 const UChar *secondArray=second.getBuffer(); 82 if(&first==&second || secondArray==NULL) { 83 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 84 return first; 85 } 86 int32_t firstLength=first.length(); 87 UnicodeString safeMiddle; 88 { 89 ReorderingBuffer buffer(impl, first); 90 if(buffer.init(firstLength+second.length(), errorCode)) { 91 normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize, 92 safeMiddle, buffer, errorCode); 93 } 94 } // The ReorderingBuffer destructor finalizes the first string. 95 if(U_FAILURE(errorCode)) { 96 // Restore the modified suffix of the first string. 97 first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle); 98 } 99 return first; 100 } 101 virtual void 102 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 103 UnicodeString &safeMiddle, 104 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; 105 virtual UBool getDecomposition(UChar32 c,UnicodeString & decomposition)106 getDecomposition(UChar32 c, UnicodeString &decomposition) const { 107 UChar buffer[4]; 108 int32_t length; 109 const UChar *d=impl.getDecomposition(c, buffer, length); 110 if(d==NULL) { 111 return FALSE; 112 } 113 if(d==buffer) { 114 decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c) 115 } else { 116 decomposition.setTo(FALSE, d, length); // read-only alias 117 } 118 return TRUE; 119 } 120 virtual UBool getRawDecomposition(UChar32 c,UnicodeString & decomposition)121 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const { 122 UChar buffer[30]; 123 int32_t length; 124 const UChar *d=impl.getRawDecomposition(c, buffer, length); 125 if(d==NULL) { 126 return FALSE; 127 } 128 if(d==buffer) { 129 decomposition.setTo(buffer, length); // copy the string (algorithmic decomposition) 130 } else { 131 decomposition.setTo(FALSE, d, length); // read-only alias 132 } 133 return TRUE; 134 } 135 virtual UChar32 composePair(UChar32 a,UChar32 b)136 composePair(UChar32 a, UChar32 b) const { 137 return impl.composePair(a, b); 138 } 139 140 virtual uint8_t getCombiningClass(UChar32 c)141 getCombiningClass(UChar32 c) const { 142 return impl.getCC(impl.getNorm16(c)); 143 } 144 145 // quick checks 146 virtual UBool isNormalized(const UnicodeString & s,UErrorCode & errorCode)147 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { 148 if(U_FAILURE(errorCode)) { 149 return FALSE; 150 } 151 const UChar *sArray=s.getBuffer(); 152 if(sArray==NULL) { 153 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 154 return FALSE; 155 } 156 const UChar *sLimit=sArray+s.length(); 157 return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode); 158 } 159 virtual UNormalizationCheckResult quickCheck(const UnicodeString & s,UErrorCode & errorCode)160 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { 161 return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO; 162 } 163 virtual int32_t spanQuickCheckYes(const UnicodeString & s,UErrorCode & errorCode)164 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const { 165 if(U_FAILURE(errorCode)) { 166 return 0; 167 } 168 const UChar *sArray=s.getBuffer(); 169 if(sArray==NULL) { 170 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 171 return 0; 172 } 173 return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray); 174 } 175 virtual const UChar * 176 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0; 177 getQuickCheck(UChar32)178 virtual UNormalizationCheckResult getQuickCheck(UChar32) const { 179 return UNORM_YES; 180 } 181 182 const Normalizer2Impl &impl; 183 }; 184 185 class DecomposeNormalizer2 : public Normalizer2WithImpl { 186 public: DecomposeNormalizer2(const Normalizer2Impl & ni)187 DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} 188 virtual ~DecomposeNormalizer2(); 189 190 private: 191 virtual void normalize(const UChar * src,const UChar * limit,ReorderingBuffer & buffer,UErrorCode & errorCode)192 normalize(const UChar *src, const UChar *limit, 193 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 194 impl.decompose(src, limit, &buffer, errorCode); 195 } 196 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. 197 virtual void normalizeAndAppend(const UChar * src,const UChar * limit,UBool doNormalize,UnicodeString & safeMiddle,ReorderingBuffer & buffer,UErrorCode & errorCode)198 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 199 UnicodeString &safeMiddle, 200 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 201 impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); 202 } 203 virtual const UChar * spanQuickCheckYes(const UChar * src,const UChar * limit,UErrorCode & errorCode)204 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { 205 return impl.decompose(src, limit, NULL, errorCode); 206 } 207 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. getQuickCheck(UChar32 c)208 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { 209 return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO; 210 } hasBoundaryBefore(UChar32 c)211 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); } hasBoundaryAfter(UChar32 c)212 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); } isInert(UChar32 c)213 virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); } 214 }; 215 216 class ComposeNormalizer2 : public Normalizer2WithImpl { 217 public: ComposeNormalizer2(const Normalizer2Impl & ni,UBool fcc)218 ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) : 219 Normalizer2WithImpl(ni), onlyContiguous(fcc) {} 220 virtual ~ComposeNormalizer2(); 221 222 private: 223 virtual void normalize(const UChar * src,const UChar * limit,ReorderingBuffer & buffer,UErrorCode & errorCode)224 normalize(const UChar *src, const UChar *limit, 225 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 226 impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode); 227 } 228 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. 229 virtual void normalizeAndAppend(const UChar * src,const UChar * limit,UBool doNormalize,UnicodeString & safeMiddle,ReorderingBuffer & buffer,UErrorCode & errorCode)230 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 231 UnicodeString &safeMiddle, 232 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 233 impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode); 234 } 235 236 virtual UBool isNormalized(const UnicodeString & s,UErrorCode & errorCode)237 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { 238 if(U_FAILURE(errorCode)) { 239 return FALSE; 240 } 241 const UChar *sArray=s.getBuffer(); 242 if(sArray==NULL) { 243 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 244 return FALSE; 245 } 246 UnicodeString temp; 247 ReorderingBuffer buffer(impl, temp); 248 if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization 249 return FALSE; 250 } 251 return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode); 252 } 253 virtual UNormalizationCheckResult quickCheck(const UnicodeString & s,UErrorCode & errorCode)254 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { 255 if(U_FAILURE(errorCode)) { 256 return UNORM_MAYBE; 257 } 258 const UChar *sArray=s.getBuffer(); 259 if(sArray==NULL) { 260 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 261 return UNORM_MAYBE; 262 } 263 UNormalizationCheckResult qcResult=UNORM_YES; 264 impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult); 265 return qcResult; 266 } 267 virtual const UChar * spanQuickCheckYes(const UChar * src,const UChar * limit,UErrorCode &)268 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const { 269 return impl.composeQuickCheck(src, limit, onlyContiguous, NULL); 270 } 271 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. getQuickCheck(UChar32 c)272 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { 273 return impl.getCompQuickCheck(impl.getNorm16(c)); 274 } hasBoundaryBefore(UChar32 c)275 virtual UBool hasBoundaryBefore(UChar32 c) const { 276 return impl.hasCompBoundaryBefore(c); 277 } hasBoundaryAfter(UChar32 c)278 virtual UBool hasBoundaryAfter(UChar32 c) const { 279 return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE); 280 } isInert(UChar32 c)281 virtual UBool isInert(UChar32 c) const { 282 return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE); 283 } 284 285 const UBool onlyContiguous; 286 }; 287 288 class FCDNormalizer2 : public Normalizer2WithImpl { 289 public: FCDNormalizer2(const Normalizer2Impl & ni)290 FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} 291 virtual ~FCDNormalizer2(); 292 293 private: 294 virtual void normalize(const UChar * src,const UChar * limit,ReorderingBuffer & buffer,UErrorCode & errorCode)295 normalize(const UChar *src, const UChar *limit, 296 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 297 impl.makeFCD(src, limit, &buffer, errorCode); 298 } 299 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. 300 virtual void normalizeAndAppend(const UChar * src,const UChar * limit,UBool doNormalize,UnicodeString & safeMiddle,ReorderingBuffer & buffer,UErrorCode & errorCode)301 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 302 UnicodeString &safeMiddle, 303 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 304 impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); 305 } 306 virtual const UChar * spanQuickCheckYes(const UChar * src,const UChar * limit,UErrorCode & errorCode)307 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { 308 return impl.makeFCD(src, limit, NULL, errorCode); 309 } 310 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. hasBoundaryBefore(UChar32 c)311 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); } hasBoundaryAfter(UChar32 c)312 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); } isInert(UChar32 c)313 virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); } 314 }; 315 316 struct Norm2AllModes : public UMemory { Norm2AllModesNorm2AllModes317 Norm2AllModes(Normalizer2Impl *i) 318 : impl(i), comp(*i, FALSE), decomp(*i), fcd(*i), fcc(*i, TRUE) {} 319 ~Norm2AllModes(); 320 321 static Norm2AllModes *createInstance(Normalizer2Impl *impl, UErrorCode &errorCode); 322 static Norm2AllModes *createNFCInstance(UErrorCode &errorCode); 323 static Norm2AllModes *createInstance(const char *packageName, 324 const char *name, 325 UErrorCode &errorCode); 326 327 static const Norm2AllModes *getNFCInstance(UErrorCode &errorCode); 328 static const Norm2AllModes *getNFKCInstance(UErrorCode &errorCode); 329 static const Norm2AllModes *getNFKC_CFInstance(UErrorCode &errorCode); 330 331 Normalizer2Impl *impl; 332 ComposeNormalizer2 comp; 333 DecomposeNormalizer2 decomp; 334 FCDNormalizer2 fcd; 335 ComposeNormalizer2 fcc; 336 }; 337 338 U_NAMESPACE_END 339 340 #endif // !UCONFIG_NO_NORMALIZATION 341 #endif // __NORM2ALLMODES_H__ 342