• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 *
4 *   Copyright (C) 2009-2011, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *   file name:  normalizer2.cpp
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2009nov22
14 *   created by: Markus W. Scherer
15 */
16 
17 #include "unicode/utypes.h"
18 
19 #if !UCONFIG_NO_NORMALIZATION
20 
21 #include "unicode/localpointer.h"
22 #include "unicode/normalizer2.h"
23 #include "unicode/unistr.h"
24 #include "unicode/unorm.h"
25 #include "cpputils.h"
26 #include "cstring.h"
27 #include "mutex.h"
28 #include "normalizer2impl.h"
29 #include "ucln_cmn.h"
30 #include "uhash.h"
31 
32 U_NAMESPACE_BEGIN
33 
34 // Public API dispatch via Normalizer2 subclasses -------------------------- ***
35 
36 // Normalizer2 implementation for the old UNORM_NONE.
37 class NoopNormalizer2 : public Normalizer2 {
38     virtual UnicodeString &
normalize(const UnicodeString & src,UnicodeString & dest,UErrorCode & errorCode) const39     normalize(const UnicodeString &src,
40               UnicodeString &dest,
41               UErrorCode &errorCode) const {
42         if(U_SUCCESS(errorCode)) {
43             if(&dest!=&src) {
44                 dest=src;
45             } else {
46                 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
47             }
48         }
49         return dest;
50     }
51     virtual UnicodeString &
normalizeSecondAndAppend(UnicodeString & first,const UnicodeString & second,UErrorCode & errorCode) const52     normalizeSecondAndAppend(UnicodeString &first,
53                              const UnicodeString &second,
54                              UErrorCode &errorCode) const {
55         if(U_SUCCESS(errorCode)) {
56             if(&first!=&second) {
57                 first.append(second);
58             } else {
59                 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
60             }
61         }
62         return first;
63     }
64     virtual UnicodeString &
append(UnicodeString & first,const UnicodeString & second,UErrorCode & errorCode) const65     append(UnicodeString &first,
66            const UnicodeString &second,
67            UErrorCode &errorCode) const {
68         if(U_SUCCESS(errorCode)) {
69             if(&first!=&second) {
70                 first.append(second);
71             } else {
72                 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
73             }
74         }
75         return first;
76     }
77     virtual UBool
getDecomposition(UChar32,UnicodeString &) const78     getDecomposition(UChar32, UnicodeString &) const {
79         return FALSE;
80     }
81     virtual UBool
isNormalized(const UnicodeString &,UErrorCode &) const82     isNormalized(const UnicodeString &, UErrorCode &) const {
83         return TRUE;
84     }
85     virtual UNormalizationCheckResult
quickCheck(const UnicodeString &,UErrorCode &) const86     quickCheck(const UnicodeString &, UErrorCode &) const {
87         return UNORM_YES;
88     }
89     virtual int32_t
spanQuickCheckYes(const UnicodeString & s,UErrorCode &) const90     spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const {
91         return s.length();
92     }
hasBoundaryBefore(UChar32) const93     virtual UBool hasBoundaryBefore(UChar32) const { return TRUE; }
hasBoundaryAfter(UChar32) const94     virtual UBool hasBoundaryAfter(UChar32) const { return TRUE; }
isInert(UChar32) const95     virtual UBool isInert(UChar32) const { return TRUE; }
96 };
97 
98 // Intermediate class:
99 // Has Normalizer2Impl and does boilerplate argument checking and setup.
100 class Normalizer2WithImpl : public Normalizer2 {
101 public:
Normalizer2WithImpl(const Normalizer2Impl & ni)102     Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
103 
104     // normalize
105     virtual UnicodeString &
normalize(const UnicodeString & src,UnicodeString & dest,UErrorCode & errorCode) const106     normalize(const UnicodeString &src,
107               UnicodeString &dest,
108               UErrorCode &errorCode) const {
109         if(U_FAILURE(errorCode)) {
110             dest.setToBogus();
111             return dest;
112         }
113         const UChar *sArray=src.getBuffer();
114         if(&dest==&src || sArray==NULL) {
115             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
116             dest.setToBogus();
117             return dest;
118         }
119         dest.remove();
120         ReorderingBuffer buffer(impl, dest);
121         if(buffer.init(src.length(), errorCode)) {
122             normalize(sArray, sArray+src.length(), buffer, errorCode);
123         }
124         return dest;
125     }
126     virtual void
127     normalize(const UChar *src, const UChar *limit,
128               ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
129 
130     // normalize and append
131     virtual UnicodeString &
normalizeSecondAndAppend(UnicodeString & first,const UnicodeString & second,UErrorCode & errorCode) const132     normalizeSecondAndAppend(UnicodeString &first,
133                              const UnicodeString &second,
134                              UErrorCode &errorCode) const {
135         return normalizeSecondAndAppend(first, second, TRUE, errorCode);
136     }
137     virtual UnicodeString &
append(UnicodeString & first,const UnicodeString & second,UErrorCode & errorCode) const138     append(UnicodeString &first,
139            const UnicodeString &second,
140            UErrorCode &errorCode) const {
141         return normalizeSecondAndAppend(first, second, FALSE, errorCode);
142     }
143     UnicodeString &
normalizeSecondAndAppend(UnicodeString & first,const UnicodeString & second,UBool doNormalize,UErrorCode & errorCode) const144     normalizeSecondAndAppend(UnicodeString &first,
145                              const UnicodeString &second,
146                              UBool doNormalize,
147                              UErrorCode &errorCode) const {
148         uprv_checkCanGetBuffer(first, errorCode);
149         if(U_FAILURE(errorCode)) {
150             return first;
151         }
152         const UChar *secondArray=second.getBuffer();
153         if(&first==&second || secondArray==NULL) {
154             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
155             return first;
156         }
157         int32_t firstLength=first.length();
158         UnicodeString safeMiddle;
159         {
160             ReorderingBuffer buffer(impl, first);
161             if(buffer.init(firstLength+second.length(), errorCode)) {
162                 normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
163                                    safeMiddle, buffer, errorCode);
164             }
165         }  // The ReorderingBuffer destructor finalizes the first string.
166         if(U_FAILURE(errorCode)) {
167             // Restore the modified suffix of the first string.
168             first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
169         }
170         return first;
171     }
172     virtual void
173     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
174                        UnicodeString &safeMiddle,
175                        ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
176     virtual UBool
getDecomposition(UChar32 c,UnicodeString & decomposition) const177     getDecomposition(UChar32 c, UnicodeString &decomposition) const {
178         UChar buffer[4];
179         int32_t length;
180         const UChar *d=impl.getDecomposition(c, buffer, length);
181         if(d==NULL) {
182             return FALSE;
183         }
184         if(d==buffer) {
185             decomposition.setTo(buffer, length);  // copy the string (Jamos from Hangul syllable c)
186         } else {
187             decomposition.setTo(FALSE, d, length);  // read-only alias
188         }
189         return TRUE;
190     }
191 
192     // quick checks
193     virtual UBool
isNormalized(const UnicodeString & s,UErrorCode & errorCode) const194     isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
195         if(U_FAILURE(errorCode)) {
196             return FALSE;
197         }
198         const UChar *sArray=s.getBuffer();
199         if(sArray==NULL) {
200             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
201             return FALSE;
202         }
203         const UChar *sLimit=sArray+s.length();
204         return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
205     }
206     virtual UNormalizationCheckResult
quickCheck(const UnicodeString & s,UErrorCode & errorCode) const207     quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
208         return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
209     }
210     virtual int32_t
spanQuickCheckYes(const UnicodeString & s,UErrorCode & errorCode) const211     spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
212         if(U_FAILURE(errorCode)) {
213             return 0;
214         }
215         const UChar *sArray=s.getBuffer();
216         if(sArray==NULL) {
217             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
218             return 0;
219         }
220         return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
221     }
222     virtual const UChar *
223     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0;
224 
getQuickCheck(UChar32) const225     virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
226         return UNORM_YES;
227     }
228 
229     const Normalizer2Impl &impl;
230 };
231 
232 class DecomposeNormalizer2 : public Normalizer2WithImpl {
233 public:
DecomposeNormalizer2(const Normalizer2Impl & ni)234     DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
235 
236 private:
237     virtual void
normalize(const UChar * src,const UChar * limit,ReorderingBuffer & buffer,UErrorCode & errorCode) const238     normalize(const UChar *src, const UChar *limit,
239               ReorderingBuffer &buffer, UErrorCode &errorCode) const {
240         impl.decompose(src, limit, &buffer, errorCode);
241     }
242     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
243     virtual void
normalizeAndAppend(const UChar * src,const UChar * limit,UBool doNormalize,UnicodeString & safeMiddle,ReorderingBuffer & buffer,UErrorCode & errorCode) const244     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
245                        UnicodeString &safeMiddle,
246                        ReorderingBuffer &buffer, UErrorCode &errorCode) const {
247         impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
248     }
249     virtual const UChar *
spanQuickCheckYes(const UChar * src,const UChar * limit,UErrorCode & errorCode) const250     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
251         return impl.decompose(src, limit, NULL, errorCode);
252     }
253     using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
getQuickCheck(UChar32 c) const254     virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
255         return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
256     }
hasBoundaryBefore(UChar32 c) const257     virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); }
hasBoundaryAfter(UChar32 c) const258     virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); }
isInert(UChar32 c) const259     virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
260 };
261 
262 class ComposeNormalizer2 : public Normalizer2WithImpl {
263 public:
ComposeNormalizer2(const Normalizer2Impl & ni,UBool fcc)264     ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
265         Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
266 
267 private:
268     virtual void
normalize(const UChar * src,const UChar * limit,ReorderingBuffer & buffer,UErrorCode & errorCode) const269     normalize(const UChar *src, const UChar *limit,
270               ReorderingBuffer &buffer, UErrorCode &errorCode) const {
271         impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
272     }
273     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
274     virtual void
normalizeAndAppend(const UChar * src,const UChar * limit,UBool doNormalize,UnicodeString & safeMiddle,ReorderingBuffer & buffer,UErrorCode & errorCode) const275     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
276                        UnicodeString &safeMiddle,
277                        ReorderingBuffer &buffer, UErrorCode &errorCode) const {
278         impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
279     }
280 
281     virtual UBool
isNormalized(const UnicodeString & s,UErrorCode & errorCode) const282     isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
283         if(U_FAILURE(errorCode)) {
284             return FALSE;
285         }
286         const UChar *sArray=s.getBuffer();
287         if(sArray==NULL) {
288             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
289             return FALSE;
290         }
291         UnicodeString temp;
292         ReorderingBuffer buffer(impl, temp);
293         if(!buffer.init(5, errorCode)) {  // small destCapacity for substring normalization
294             return FALSE;
295         }
296         return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode);
297     }
298     virtual UNormalizationCheckResult
quickCheck(const UnicodeString & s,UErrorCode & errorCode) const299     quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
300         if(U_FAILURE(errorCode)) {
301             return UNORM_MAYBE;
302         }
303         const UChar *sArray=s.getBuffer();
304         if(sArray==NULL) {
305             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
306             return UNORM_MAYBE;
307         }
308         UNormalizationCheckResult qcResult=UNORM_YES;
309         impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
310         return qcResult;
311     }
312     virtual const UChar *
spanQuickCheckYes(const UChar * src,const UChar * limit,UErrorCode &) const313     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const {
314         return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
315     }
316     using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
getQuickCheck(UChar32 c) const317     virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
318         return impl.getCompQuickCheck(impl.getNorm16(c));
319     }
hasBoundaryBefore(UChar32 c) const320     virtual UBool hasBoundaryBefore(UChar32 c) const {
321         return impl.hasCompBoundaryBefore(c);
322     }
hasBoundaryAfter(UChar32 c) const323     virtual UBool hasBoundaryAfter(UChar32 c) const {
324         return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE);
325     }
isInert(UChar32 c) const326     virtual UBool isInert(UChar32 c) const {
327         return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE);
328     }
329 
330     const UBool onlyContiguous;
331 };
332 
333 class FCDNormalizer2 : public Normalizer2WithImpl {
334 public:
FCDNormalizer2(const Normalizer2Impl & ni)335     FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
336 
337 private:
338     virtual void
normalize(const UChar * src,const UChar * limit,ReorderingBuffer & buffer,UErrorCode & errorCode) const339     normalize(const UChar *src, const UChar *limit,
340               ReorderingBuffer &buffer, UErrorCode &errorCode) const {
341         impl.makeFCD(src, limit, &buffer, errorCode);
342     }
343     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
344     virtual void
normalizeAndAppend(const UChar * src,const UChar * limit,UBool doNormalize,UnicodeString & safeMiddle,ReorderingBuffer & buffer,UErrorCode & errorCode) const345     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
346                        UnicodeString &safeMiddle,
347                        ReorderingBuffer &buffer, UErrorCode &errorCode) const {
348         impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
349     }
350     virtual const UChar *
spanQuickCheckYes(const UChar * src,const UChar * limit,UErrorCode & errorCode) const351     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
352         return impl.makeFCD(src, limit, NULL, errorCode);
353     }
354     using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
hasBoundaryBefore(UChar32 c) const355     virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
hasBoundaryAfter(UChar32 c) const356     virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
isInert(UChar32 c) const357     virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
358 };
359 
360 // instance cache ---------------------------------------------------------- ***
361 
362 struct Norm2AllModes : public UMemory {
363     static Norm2AllModes *createInstance(const char *packageName,
364                                          const char *name,
365                                          UErrorCode &errorCode);
Norm2AllModesNorm2AllModes366     Norm2AllModes() : comp(impl, FALSE), decomp(impl), fcd(impl), fcc(impl, TRUE) {}
367 
368     Normalizer2Impl impl;
369     ComposeNormalizer2 comp;
370     DecomposeNormalizer2 decomp;
371     FCDNormalizer2 fcd;
372     ComposeNormalizer2 fcc;
373 };
374 
375 Norm2AllModes *
createInstance(const char * packageName,const char * name,UErrorCode & errorCode)376 Norm2AllModes::createInstance(const char *packageName,
377                               const char *name,
378                               UErrorCode &errorCode) {
379     if(U_FAILURE(errorCode)) {
380         return NULL;
381     }
382     LocalPointer<Norm2AllModes> allModes(new Norm2AllModes);
383     if(allModes.isNull()) {
384         errorCode=U_MEMORY_ALLOCATION_ERROR;
385         return NULL;
386     }
387     allModes->impl.load(packageName, name, errorCode);
388     return U_SUCCESS(errorCode) ? allModes.orphan() : NULL;
389 }
390 
391 U_CDECL_BEGIN
392 static UBool U_CALLCONV uprv_normalizer2_cleanup();
393 U_CDECL_END
394 
395 class Norm2AllModesSingleton : public TriStateSingletonWrapper<Norm2AllModes> {
396 public:
Norm2AllModesSingleton(TriStateSingleton & s,const char * n)397     Norm2AllModesSingleton(TriStateSingleton &s, const char *n) :
398         TriStateSingletonWrapper<Norm2AllModes>(s), name(n) {}
getInstance(UErrorCode & errorCode)399     Norm2AllModes *getInstance(UErrorCode &errorCode) {
400         return TriStateSingletonWrapper<Norm2AllModes>::getInstance(createInstance, name, errorCode);
401     }
402 private:
createInstance(const void * context,UErrorCode & errorCode)403     static void *createInstance(const void *context, UErrorCode &errorCode) {
404         ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
405         return Norm2AllModes::createInstance(NULL, (const char *)context, errorCode);
406     }
407 
408     const char *name;
409 };
410 
411 STATIC_TRI_STATE_SINGLETON(nfcSingleton);
412 STATIC_TRI_STATE_SINGLETON(nfkcSingleton);
413 STATIC_TRI_STATE_SINGLETON(nfkc_cfSingleton);
414 
415 class Norm2Singleton : public SimpleSingletonWrapper<Normalizer2> {
416 public:
Norm2Singleton(SimpleSingleton & s)417     Norm2Singleton(SimpleSingleton &s) : SimpleSingletonWrapper<Normalizer2>(s) {}
getInstance(UErrorCode & errorCode)418     Normalizer2 *getInstance(UErrorCode &errorCode) {
419         return SimpleSingletonWrapper<Normalizer2>::getInstance(createInstance, NULL, errorCode);
420     }
421 private:
createInstance(const void *,UErrorCode & errorCode)422     static void *createInstance(const void *, UErrorCode &errorCode) {
423         Normalizer2 *noop=new NoopNormalizer2;
424         if(noop==NULL) {
425             errorCode=U_MEMORY_ALLOCATION_ERROR;
426         }
427         ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
428         return noop;
429     }
430 };
431 
432 STATIC_SIMPLE_SINGLETON(noopSingleton);
433 
434 static UHashtable *cache=NULL;
435 
436 U_CDECL_BEGIN
437 
deleteNorm2AllModes(void * allModes)438 static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
439     delete (Norm2AllModes *)allModes;
440 }
441 
uprv_normalizer2_cleanup()442 static UBool U_CALLCONV uprv_normalizer2_cleanup() {
443     Norm2AllModesSingleton(nfcSingleton, NULL).deleteInstance();
444     Norm2AllModesSingleton(nfkcSingleton, NULL).deleteInstance();
445     Norm2AllModesSingleton(nfkc_cfSingleton, NULL).deleteInstance();
446     Norm2Singleton(noopSingleton).deleteInstance();
447     uhash_close(cache);
448     cache=NULL;
449     return TRUE;
450 }
451 
452 U_CDECL_END
453 
getNFCInstance(UErrorCode & errorCode)454 const Normalizer2 *Normalizer2Factory::getNFCInstance(UErrorCode &errorCode) {
455     Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
456     return allModes!=NULL ? &allModes->comp : NULL;
457 }
458 
getNFDInstance(UErrorCode & errorCode)459 const Normalizer2 *Normalizer2Factory::getNFDInstance(UErrorCode &errorCode) {
460     Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
461     return allModes!=NULL ? &allModes->decomp : NULL;
462 }
463 
getFCDInstance(UErrorCode & errorCode)464 const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
465     Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
466     if(allModes!=NULL) {
467         allModes->impl.getFCDTrie(errorCode);
468         return &allModes->fcd;
469     } else {
470         return NULL;
471     }
472 }
473 
getFCCInstance(UErrorCode & errorCode)474 const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
475     Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
476     return allModes!=NULL ? &allModes->fcc : NULL;
477 }
478 
getNFKCInstance(UErrorCode & errorCode)479 const Normalizer2 *Normalizer2Factory::getNFKCInstance(UErrorCode &errorCode) {
480     Norm2AllModes *allModes=
481         Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
482     return allModes!=NULL ? &allModes->comp : NULL;
483 }
484 
getNFKDInstance(UErrorCode & errorCode)485 const Normalizer2 *Normalizer2Factory::getNFKDInstance(UErrorCode &errorCode) {
486     Norm2AllModes *allModes=
487         Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
488     return allModes!=NULL ? &allModes->decomp : NULL;
489 }
490 
getNFKC_CFInstance(UErrorCode & errorCode)491 const Normalizer2 *Normalizer2Factory::getNFKC_CFInstance(UErrorCode &errorCode) {
492     Norm2AllModes *allModes=
493         Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
494     return allModes!=NULL ? &allModes->comp : NULL;
495 }
496 
getNoopInstance(UErrorCode & errorCode)497 const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) {
498     return Norm2Singleton(noopSingleton).getInstance(errorCode);
499 }
500 
501 const Normalizer2 *
getInstance(UNormalizationMode mode,UErrorCode & errorCode)502 Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
503     if(U_FAILURE(errorCode)) {
504         return NULL;
505     }
506     switch(mode) {
507     case UNORM_NFD:
508         return getNFDInstance(errorCode);
509     case UNORM_NFKD:
510         return getNFKDInstance(errorCode);
511     case UNORM_NFC:
512         return getNFCInstance(errorCode);
513     case UNORM_NFKC:
514         return getNFKCInstance(errorCode);
515     case UNORM_FCD:
516         return getFCDInstance(errorCode);
517     default:  // UNORM_NONE
518         return getNoopInstance(errorCode);
519     }
520 }
521 
522 const Normalizer2Impl *
getNFCImpl(UErrorCode & errorCode)523 Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
524     Norm2AllModes *allModes=
525         Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
526     return allModes!=NULL ? &allModes->impl : NULL;
527 }
528 
529 const Normalizer2Impl *
getNFKCImpl(UErrorCode & errorCode)530 Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
531     Norm2AllModes *allModes=
532         Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
533     return allModes!=NULL ? &allModes->impl : NULL;
534 }
535 
536 const Normalizer2Impl *
getNFKC_CFImpl(UErrorCode & errorCode)537 Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
538     Norm2AllModes *allModes=
539         Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
540     return allModes!=NULL ? &allModes->impl : NULL;
541 }
542 
543 const Normalizer2Impl *
getImpl(const Normalizer2 * norm2)544 Normalizer2Factory::getImpl(const Normalizer2 *norm2) {
545     return &((Normalizer2WithImpl *)norm2)->impl;
546 }
547 
548 const UTrie2 *
getFCDTrie(UErrorCode & errorCode)549 Normalizer2Factory::getFCDTrie(UErrorCode &errorCode) {
550     Norm2AllModes *allModes=
551         Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
552     if(allModes!=NULL) {
553         return allModes->impl.getFCDTrie(errorCode);
554     } else {
555         return NULL;
556     }
557 }
558 
559 const Normalizer2 *
getInstance(const char * packageName,const char * name,UNormalization2Mode mode,UErrorCode & errorCode)560 Normalizer2::getInstance(const char *packageName,
561                          const char *name,
562                          UNormalization2Mode mode,
563                          UErrorCode &errorCode) {
564     if(U_FAILURE(errorCode)) {
565         return NULL;
566     }
567     if(name==NULL || *name==0) {
568         errorCode=U_ILLEGAL_ARGUMENT_ERROR;
569     }
570     Norm2AllModes *allModes=NULL;
571     if(packageName==NULL) {
572         if(0==uprv_strcmp(name, "nfc")) {
573             allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
574         } else if(0==uprv_strcmp(name, "nfkc")) {
575             allModes=Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
576         } else if(0==uprv_strcmp(name, "nfkc_cf")) {
577             allModes=Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
578         }
579     }
580     if(allModes==NULL && U_SUCCESS(errorCode)) {
581         {
582             Mutex lock;
583             if(cache!=NULL) {
584                 allModes=(Norm2AllModes *)uhash_get(cache, name);
585             }
586         }
587         if(allModes==NULL) {
588             LocalPointer<Norm2AllModes> localAllModes(
589                 Norm2AllModes::createInstance(packageName, name, errorCode));
590             if(U_SUCCESS(errorCode)) {
591                 Mutex lock;
592                 if(cache==NULL) {
593                     cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode);
594                     if(U_FAILURE(errorCode)) {
595                         return NULL;
596                     }
597                     uhash_setKeyDeleter(cache, uprv_free);
598                     uhash_setValueDeleter(cache, deleteNorm2AllModes);
599                 }
600                 void *temp=uhash_get(cache, name);
601                 if(temp==NULL) {
602                     int32_t keyLength=uprv_strlen(name)+1;
603                     char *nameCopy=(char *)uprv_malloc(keyLength);
604                     if(nameCopy==NULL) {
605                         errorCode=U_MEMORY_ALLOCATION_ERROR;
606                         return NULL;
607                     }
608                     uprv_memcpy(nameCopy, name, keyLength);
609                     uhash_put(cache, nameCopy, allModes=localAllModes.orphan(), &errorCode);
610                 } else {
611                     // race condition
612                     allModes=(Norm2AllModes *)temp;
613                 }
614             }
615         }
616     }
617     if(allModes!=NULL && U_SUCCESS(errorCode)) {
618         switch(mode) {
619         case UNORM2_COMPOSE:
620             return &allModes->comp;
621         case UNORM2_DECOMPOSE:
622             return &allModes->decomp;
623         case UNORM2_FCD:
624             allModes->impl.getFCDTrie(errorCode);
625             return &allModes->fcd;
626         case UNORM2_COMPOSE_CONTIGUOUS:
627             return &allModes->fcc;
628         default:
629             break;  // do nothing
630         }
631     }
632     return NULL;
633 }
634 
UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(Normalizer2)635 UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(Normalizer2)
636 
637 U_NAMESPACE_END
638 
639 // C API ------------------------------------------------------------------- ***
640 
641 U_NAMESPACE_USE
642 
643 U_DRAFT const UNormalizer2 * U_EXPORT2
644 unorm2_getInstance(const char *packageName,
645                    const char *name,
646                    UNormalization2Mode mode,
647                    UErrorCode *pErrorCode) {
648     return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
649 }
650 
651 U_DRAFT void U_EXPORT2
unorm2_close(UNormalizer2 * norm2)652 unorm2_close(UNormalizer2 *norm2) {
653     delete (Normalizer2 *)norm2;
654 }
655 
656 U_DRAFT int32_t U_EXPORT2
unorm2_normalize(const UNormalizer2 * norm2,const UChar * src,int32_t length,UChar * dest,int32_t capacity,UErrorCode * pErrorCode)657 unorm2_normalize(const UNormalizer2 *norm2,
658                  const UChar *src, int32_t length,
659                  UChar *dest, int32_t capacity,
660                  UErrorCode *pErrorCode) {
661     if(U_FAILURE(*pErrorCode)) {
662         return 0;
663     }
664     if( (src==NULL ? length!=0 : length<-1) ||
665         (dest==NULL ? capacity!=0 : capacity<0) ||
666         (src==dest && src!=NULL)
667     ) {
668         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
669         return 0;
670     }
671     UnicodeString destString(dest, 0, capacity);
672     // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) would crash.
673     if(length!=0) {
674         const Normalizer2 *n2=(const Normalizer2 *)norm2;
675         const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
676         if(n2wi!=NULL) {
677             // Avoid duplicate argument checking and support NUL-terminated src.
678             ReorderingBuffer buffer(n2wi->impl, destString);
679             if(buffer.init(length, *pErrorCode)) {
680                 n2wi->normalize(src, length>=0 ? src+length : NULL, buffer, *pErrorCode);
681             }
682         } else {
683             UnicodeString srcString(length<0, src, length);
684             n2->normalize(srcString, destString, *pErrorCode);
685         }
686     }
687     return destString.extract(dest, capacity, *pErrorCode);
688 }
689 
690 static int32_t
normalizeSecondAndAppend(const UNormalizer2 * norm2,UChar * first,int32_t firstLength,int32_t firstCapacity,const UChar * second,int32_t secondLength,UBool doNormalize,UErrorCode * pErrorCode)691 normalizeSecondAndAppend(const UNormalizer2 *norm2,
692                          UChar *first, int32_t firstLength, int32_t firstCapacity,
693                          const UChar *second, int32_t secondLength,
694                          UBool doNormalize,
695                          UErrorCode *pErrorCode) {
696     if(U_FAILURE(*pErrorCode)) {
697         return 0;
698     }
699     if( (second==NULL ? secondLength!=0 : secondLength<-1) ||
700         (first==NULL ? (firstCapacity!=0 || firstLength!=0) :
701                        (firstCapacity<0 || firstLength<-1)) ||
702         (first==second && first!=NULL)
703     ) {
704         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
705         return 0;
706     }
707     UnicodeString firstString(first, firstLength, firstCapacity);
708     firstLength=firstString.length();  // In case it was -1.
709     // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash.
710     if(secondLength!=0) {
711         const Normalizer2 *n2=(const Normalizer2 *)norm2;
712         const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
713         if(n2wi!=NULL) {
714             // Avoid duplicate argument checking and support NUL-terminated src.
715             UnicodeString safeMiddle;
716             {
717                 ReorderingBuffer buffer(n2wi->impl, firstString);
718                 if(buffer.init(firstLength+secondLength+1, *pErrorCode)) {  // destCapacity>=-1
719                     n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL,
720                                              doNormalize, safeMiddle, buffer, *pErrorCode);
721                 }
722             }  // The ReorderingBuffer destructor finalizes firstString.
723             if(U_FAILURE(*pErrorCode) || firstString.length()>firstCapacity) {
724                 // Restore the modified suffix of the first string.
725                 // This does not restore first[] array contents between firstLength and firstCapacity.
726                 // (That might be uninitialized memory, as far as we know.)
727                 safeMiddle.extract(0, 0x7fffffff, first+firstLength-safeMiddle.length());
728                 if(firstLength<firstCapacity) {
729                     first[firstLength]=0;  // NUL-terminate in case it was originally.
730                 }
731             }
732         } else {
733             UnicodeString secondString(secondLength<0, second, secondLength);
734             if(doNormalize) {
735                 n2->normalizeSecondAndAppend(firstString, secondString, *pErrorCode);
736             } else {
737                 n2->append(firstString, secondString, *pErrorCode);
738             }
739         }
740     }
741     return firstString.extract(first, firstCapacity, *pErrorCode);
742 }
743 
744 U_DRAFT int32_t U_EXPORT2
unorm2_normalizeSecondAndAppend(const UNormalizer2 * norm2,UChar * first,int32_t firstLength,int32_t firstCapacity,const UChar * second,int32_t secondLength,UErrorCode * pErrorCode)745 unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2,
746                                 UChar *first, int32_t firstLength, int32_t firstCapacity,
747                                 const UChar *second, int32_t secondLength,
748                                 UErrorCode *pErrorCode) {
749     return normalizeSecondAndAppend(norm2,
750                                     first, firstLength, firstCapacity,
751                                     second, secondLength,
752                                     TRUE, pErrorCode);
753 }
754 
755 U_DRAFT int32_t U_EXPORT2
unorm2_append(const UNormalizer2 * norm2,UChar * first,int32_t firstLength,int32_t firstCapacity,const UChar * second,int32_t secondLength,UErrorCode * pErrorCode)756 unorm2_append(const UNormalizer2 *norm2,
757               UChar *first, int32_t firstLength, int32_t firstCapacity,
758               const UChar *second, int32_t secondLength,
759               UErrorCode *pErrorCode) {
760     return normalizeSecondAndAppend(norm2,
761                                     first, firstLength, firstCapacity,
762                                     second, secondLength,
763                                     FALSE, pErrorCode);
764 }
765 
766 U_DRAFT int32_t U_EXPORT2
unorm2_getDecomposition(const UNormalizer2 * norm2,UChar32 c,UChar * decomposition,int32_t capacity,UErrorCode * pErrorCode)767 unorm2_getDecomposition(const UNormalizer2 *norm2,
768                         UChar32 c, UChar *decomposition, int32_t capacity,
769                         UErrorCode *pErrorCode) {
770     if(U_FAILURE(*pErrorCode)) {
771         return 0;
772     }
773     if(decomposition==NULL ? capacity!=0 : capacity<0) {
774         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
775         return 0;
776     }
777     UnicodeString destString(decomposition, 0, capacity);
778     if(reinterpret_cast<const Normalizer2 *>(norm2)->getDecomposition(c, destString)) {
779         return destString.extract(decomposition, capacity, *pErrorCode);
780     } else {
781         return -1;
782     }
783 }
784 
785 U_DRAFT UBool U_EXPORT2
unorm2_isNormalized(const UNormalizer2 * norm2,const UChar * s,int32_t length,UErrorCode * pErrorCode)786 unorm2_isNormalized(const UNormalizer2 *norm2,
787                     const UChar *s, int32_t length,
788                     UErrorCode *pErrorCode) {
789     if(U_FAILURE(*pErrorCode)) {
790         return 0;
791     }
792     if((s==NULL && length!=0) || length<-1) {
793         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
794         return 0;
795     }
796     UnicodeString sString(length<0, s, length);
797     return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode);
798 }
799 
800 U_DRAFT UNormalizationCheckResult U_EXPORT2
unorm2_quickCheck(const UNormalizer2 * norm2,const UChar * s,int32_t length,UErrorCode * pErrorCode)801 unorm2_quickCheck(const UNormalizer2 *norm2,
802                   const UChar *s, int32_t length,
803                   UErrorCode *pErrorCode) {
804     if(U_FAILURE(*pErrorCode)) {
805         return UNORM_NO;
806     }
807     if((s==NULL && length!=0) || length<-1) {
808         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
809         return UNORM_NO;
810     }
811     UnicodeString sString(length<0, s, length);
812     return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode);
813 }
814 
815 U_DRAFT int32_t U_EXPORT2
unorm2_spanQuickCheckYes(const UNormalizer2 * norm2,const UChar * s,int32_t length,UErrorCode * pErrorCode)816 unorm2_spanQuickCheckYes(const UNormalizer2 *norm2,
817                          const UChar *s, int32_t length,
818                          UErrorCode *pErrorCode) {
819     if(U_FAILURE(*pErrorCode)) {
820         return 0;
821     }
822     if((s==NULL && length!=0) || length<-1) {
823         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
824         return 0;
825     }
826     UnicodeString sString(length<0, s, length);
827     return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode);
828 }
829 
830 U_DRAFT UBool U_EXPORT2
unorm2_hasBoundaryBefore(const UNormalizer2 * norm2,UChar32 c)831 unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) {
832     return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c);
833 }
834 
835 U_DRAFT UBool U_EXPORT2
unorm2_hasBoundaryAfter(const UNormalizer2 * norm2,UChar32 c)836 unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) {
837     return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c);
838 }
839 
840 U_DRAFT UBool U_EXPORT2
unorm2_isInert(const UNormalizer2 * norm2,UChar32 c)841 unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) {
842     return ((const Normalizer2 *)norm2)->isInert(c);
843 }
844 
845 // Some properties APIs ---------------------------------------------------- ***
846 
847 U_CFUNC UNormalizationCheckResult U_EXPORT2
unorm_getQuickCheck(UChar32 c,UNormalizationMode mode)848 unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
849     if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
850         return UNORM_YES;
851     }
852     UErrorCode errorCode=U_ZERO_ERROR;
853     const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
854     if(U_SUCCESS(errorCode)) {
855         return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
856     } else {
857         return UNORM_MAYBE;
858     }
859 }
860 
861 U_CAPI const uint16_t * U_EXPORT2
unorm_getFCDTrieIndex(UChar32 & fcdHighStart,UErrorCode * pErrorCode)862 unorm_getFCDTrieIndex(UChar32 &fcdHighStart, UErrorCode *pErrorCode) {
863     const UTrie2 *trie=Normalizer2Factory::getFCDTrie(*pErrorCode);
864     if(U_SUCCESS(*pErrorCode)) {
865         fcdHighStart=trie->highStart;
866         return trie->index;
867     } else {
868         return NULL;
869     }
870 }
871 
872 #endif  // !UCONFIG_NO_NORMALIZATION
873