• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 *
4 *   Copyright (C) 2009-2010, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *   file name:  normalizer2.cpp
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2009nov22
14 *   created by: Markus W. Scherer
15 */
16 
17 #include "unicode/utypes.h"
18 
19 #if !UCONFIG_NO_NORMALIZATION
20 
21 #include "unicode/localpointer.h"
22 #include "unicode/normalizer2.h"
23 #include "unicode/unistr.h"
24 #include "unicode/unorm.h"
25 #include "cpputils.h"
26 #include "cstring.h"
27 #include "mutex.h"
28 #include "normalizer2impl.h"
29 #include "ucln_cmn.h"
30 #include "uhash.h"
31 
32 U_NAMESPACE_BEGIN
33 
34 // Public API dispatch via Normalizer2 subclasses -------------------------- ***
35 
36 // Normalizer2 implementation for the old UNORM_NONE.
37 class NoopNormalizer2 : public Normalizer2 {
38     virtual UnicodeString &
normalize(const UnicodeString & src,UnicodeString & dest,UErrorCode & errorCode) const39     normalize(const UnicodeString &src,
40               UnicodeString &dest,
41               UErrorCode &errorCode) const {
42         if(U_SUCCESS(errorCode)) {
43             if(&dest!=&src) {
44                 dest=src;
45             } else {
46                 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
47             }
48         }
49         return dest;
50     }
51     virtual UnicodeString &
normalizeSecondAndAppend(UnicodeString & first,const UnicodeString & second,UErrorCode & errorCode) const52     normalizeSecondAndAppend(UnicodeString &first,
53                              const UnicodeString &second,
54                              UErrorCode &errorCode) const {
55         if(U_SUCCESS(errorCode)) {
56             if(&first!=&second) {
57                 first.append(second);
58             } else {
59                 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
60             }
61         }
62         return first;
63     }
64     virtual UnicodeString &
append(UnicodeString & first,const UnicodeString & second,UErrorCode & errorCode) const65     append(UnicodeString &first,
66            const UnicodeString &second,
67            UErrorCode &errorCode) const {
68         if(U_SUCCESS(errorCode)) {
69             if(&first!=&second) {
70                 first.append(second);
71             } else {
72                 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
73             }
74         }
75         return first;
76     }
77     virtual UBool
getDecomposition(UChar32,UnicodeString &) const78     getDecomposition(UChar32, UnicodeString &) const {
79         return FALSE;
80     }
81     virtual UBool
isNormalized(const UnicodeString &,UErrorCode &) const82     isNormalized(const UnicodeString &, UErrorCode &) const {
83         return TRUE;
84     }
85     virtual UNormalizationCheckResult
quickCheck(const UnicodeString &,UErrorCode &) const86     quickCheck(const UnicodeString &, UErrorCode &) const {
87         return UNORM_YES;
88     }
89     virtual int32_t
spanQuickCheckYes(const UnicodeString & s,UErrorCode &) const90     spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const {
91         return s.length();
92     }
hasBoundaryBefore(UChar32) const93     virtual UBool hasBoundaryBefore(UChar32) const { return TRUE; }
hasBoundaryAfter(UChar32) const94     virtual UBool hasBoundaryAfter(UChar32) const { return TRUE; }
isInert(UChar32) const95     virtual UBool isInert(UChar32) const { return TRUE; }
96 };
97 
98 // Intermediate class:
99 // Has Normalizer2Impl and does boilerplate argument checking and setup.
100 class Normalizer2WithImpl : public Normalizer2 {
101 public:
Normalizer2WithImpl(const Normalizer2Impl & ni)102     Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
103 
104     // normalize
105     virtual UnicodeString &
normalize(const UnicodeString & src,UnicodeString & dest,UErrorCode & errorCode) const106     normalize(const UnicodeString &src,
107               UnicodeString &dest,
108               UErrorCode &errorCode) const {
109         if(U_FAILURE(errorCode)) {
110             dest.setToBogus();
111             return dest;
112         }
113         const UChar *sArray=src.getBuffer();
114         if(&dest==&src || sArray==NULL) {
115             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
116             dest.setToBogus();
117             return dest;
118         }
119         dest.remove();
120         ReorderingBuffer buffer(impl, dest);
121         if(buffer.init(src.length(), errorCode)) {
122             normalize(sArray, sArray+src.length(), buffer, errorCode);
123         }
124         return dest;
125     }
126     virtual void
127     normalize(const UChar *src, const UChar *limit,
128               ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
129 
130     // normalize and append
131     virtual UnicodeString &
normalizeSecondAndAppend(UnicodeString & first,const UnicodeString & second,UErrorCode & errorCode) const132     normalizeSecondAndAppend(UnicodeString &first,
133                              const UnicodeString &second,
134                              UErrorCode &errorCode) const {
135         return normalizeSecondAndAppend(first, second, TRUE, errorCode);
136     }
137     virtual UnicodeString &
append(UnicodeString & first,const UnicodeString & second,UErrorCode & errorCode) const138     append(UnicodeString &first,
139            const UnicodeString &second,
140            UErrorCode &errorCode) const {
141         return normalizeSecondAndAppend(first, second, FALSE, errorCode);
142     }
143     UnicodeString &
normalizeSecondAndAppend(UnicodeString & first,const UnicodeString & second,UBool doNormalize,UErrorCode & errorCode) const144     normalizeSecondAndAppend(UnicodeString &first,
145                              const UnicodeString &second,
146                              UBool doNormalize,
147                              UErrorCode &errorCode) const {
148         uprv_checkCanGetBuffer(first, errorCode);
149         if(U_FAILURE(errorCode)) {
150             return first;
151         }
152         const UChar *secondArray=second.getBuffer();
153         if(&first==&second || secondArray==NULL) {
154             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
155             return first;
156         }
157         ReorderingBuffer buffer(impl, first);
158         if(buffer.init(first.length()+second.length(), errorCode)) {
159             normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
160                                buffer, errorCode);
161         }
162         return first;
163     }
164     virtual void
165     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
166                        ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
167     virtual UBool
getDecomposition(UChar32 c,UnicodeString & decomposition) const168     getDecomposition(UChar32 c, UnicodeString &decomposition) const {
169         UChar buffer[4];
170         int32_t length;
171         const UChar *d=impl.getDecomposition(c, buffer, length);
172         if(d==NULL) {
173             return FALSE;
174         }
175         if(d==buffer) {
176             decomposition.setTo(buffer, length);  // copy the string (Jamos from Hangul syllable c)
177         } else {
178             decomposition.setTo(FALSE, d, length);  // read-only alias
179         }
180         return TRUE;
181     }
182 
183     // quick checks
184     virtual UBool
isNormalized(const UnicodeString & s,UErrorCode & errorCode) const185     isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
186         if(U_FAILURE(errorCode)) {
187             return FALSE;
188         }
189         const UChar *sArray=s.getBuffer();
190         if(sArray==NULL) {
191             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
192             return FALSE;
193         }
194         const UChar *sLimit=sArray+s.length();
195         return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
196     }
197     virtual UNormalizationCheckResult
quickCheck(const UnicodeString & s,UErrorCode & errorCode) const198     quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
199         return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
200     }
201     virtual int32_t
spanQuickCheckYes(const UnicodeString & s,UErrorCode & errorCode) const202     spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
203         if(U_FAILURE(errorCode)) {
204             return 0;
205         }
206         const UChar *sArray=s.getBuffer();
207         if(sArray==NULL) {
208             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
209             return 0;
210         }
211         return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
212     }
213     virtual const UChar *
214     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0;
215 
getQuickCheck(UChar32) const216     virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
217         return UNORM_YES;
218     }
219 
220     const Normalizer2Impl &impl;
221 };
222 
223 class DecomposeNormalizer2 : public Normalizer2WithImpl {
224 public:
DecomposeNormalizer2(const Normalizer2Impl & ni)225     DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
226 
227 private:
228     virtual void
normalize(const UChar * src,const UChar * limit,ReorderingBuffer & buffer,UErrorCode & errorCode) const229     normalize(const UChar *src, const UChar *limit,
230               ReorderingBuffer &buffer, UErrorCode &errorCode) const {
231         impl.decompose(src, limit, &buffer, errorCode);
232     }
233     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
234     virtual void
normalizeAndAppend(const UChar * src,const UChar * limit,UBool doNormalize,ReorderingBuffer & buffer,UErrorCode & errorCode) const235     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
236                        ReorderingBuffer &buffer, UErrorCode &errorCode) const {
237         impl.decomposeAndAppend(src, limit, doNormalize, buffer, errorCode);
238     }
239     virtual const UChar *
spanQuickCheckYes(const UChar * src,const UChar * limit,UErrorCode & errorCode) const240     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
241         return impl.decompose(src, limit, NULL, errorCode);
242     }
243     using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
getQuickCheck(UChar32 c) const244     virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
245         return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
246     }
hasBoundaryBefore(UChar32 c) const247     virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); }
hasBoundaryAfter(UChar32 c) const248     virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); }
isInert(UChar32 c) const249     virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
250 };
251 
252 class ComposeNormalizer2 : public Normalizer2WithImpl {
253 public:
ComposeNormalizer2(const Normalizer2Impl & ni,UBool fcc)254     ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
255         Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
256 
257 private:
258     virtual void
normalize(const UChar * src,const UChar * limit,ReorderingBuffer & buffer,UErrorCode & errorCode) const259     normalize(const UChar *src, const UChar *limit,
260               ReorderingBuffer &buffer, UErrorCode &errorCode) const {
261         impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
262     }
263     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
264     virtual void
normalizeAndAppend(const UChar * src,const UChar * limit,UBool doNormalize,ReorderingBuffer & buffer,UErrorCode & errorCode) const265     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
266                        ReorderingBuffer &buffer, UErrorCode &errorCode) const {
267         impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, buffer, errorCode);
268     }
269 
270     virtual UBool
isNormalized(const UnicodeString & s,UErrorCode & errorCode) const271     isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
272         if(U_FAILURE(errorCode)) {
273             return FALSE;
274         }
275         const UChar *sArray=s.getBuffer();
276         if(sArray==NULL) {
277             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
278             return FALSE;
279         }
280         UnicodeString temp;
281         ReorderingBuffer buffer(impl, temp);
282         if(!buffer.init(5, errorCode)) {  // small destCapacity for substring normalization
283             return FALSE;
284         }
285         return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode);
286     }
287     virtual UNormalizationCheckResult
quickCheck(const UnicodeString & s,UErrorCode & errorCode) const288     quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
289         if(U_FAILURE(errorCode)) {
290             return UNORM_MAYBE;
291         }
292         const UChar *sArray=s.getBuffer();
293         if(sArray==NULL) {
294             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
295             return UNORM_MAYBE;
296         }
297         UNormalizationCheckResult qcResult=UNORM_YES;
298         impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
299         return qcResult;
300     }
301     virtual const UChar *
spanQuickCheckYes(const UChar * src,const UChar * limit,UErrorCode &) const302     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const {
303         return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
304     }
305     using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
getQuickCheck(UChar32 c) const306     virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
307         return impl.getCompQuickCheck(impl.getNorm16(c));
308     }
hasBoundaryBefore(UChar32 c) const309     virtual UBool hasBoundaryBefore(UChar32 c) const {
310         return impl.hasCompBoundaryBefore(c);
311     }
hasBoundaryAfter(UChar32 c) const312     virtual UBool hasBoundaryAfter(UChar32 c) const {
313         return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE);
314     }
isInert(UChar32 c) const315     virtual UBool isInert(UChar32 c) const {
316         return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE);
317     }
318 
319     const UBool onlyContiguous;
320 };
321 
322 class FCDNormalizer2 : public Normalizer2WithImpl {
323 public:
FCDNormalizer2(const Normalizer2Impl & ni)324     FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
325 
326 private:
327     virtual void
normalize(const UChar * src,const UChar * limit,ReorderingBuffer & buffer,UErrorCode & errorCode) const328     normalize(const UChar *src, const UChar *limit,
329               ReorderingBuffer &buffer, UErrorCode &errorCode) const {
330         impl.makeFCD(src, limit, &buffer, errorCode);
331     }
332     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
333     virtual void
normalizeAndAppend(const UChar * src,const UChar * limit,UBool doNormalize,ReorderingBuffer & buffer,UErrorCode & errorCode) const334     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
335                        ReorderingBuffer &buffer, UErrorCode &errorCode) const {
336         impl.makeFCDAndAppend(src, limit, doNormalize, buffer, errorCode);
337     }
338     virtual const UChar *
spanQuickCheckYes(const UChar * src,const UChar * limit,UErrorCode & errorCode) const339     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
340         return impl.makeFCD(src, limit, NULL, errorCode);
341     }
342     using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
hasBoundaryBefore(UChar32 c) const343     virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
hasBoundaryAfter(UChar32 c) const344     virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
isInert(UChar32 c) const345     virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
346 };
347 
348 // instance cache ---------------------------------------------------------- ***
349 
350 struct Norm2AllModes : public UMemory {
351     static Norm2AllModes *createInstance(const char *packageName,
352                                          const char *name,
353                                          UErrorCode &errorCode);
Norm2AllModesNorm2AllModes354     Norm2AllModes() : comp(impl, FALSE), decomp(impl), fcd(impl), fcc(impl, TRUE) {}
355 
356     Normalizer2Impl impl;
357     ComposeNormalizer2 comp;
358     DecomposeNormalizer2 decomp;
359     FCDNormalizer2 fcd;
360     ComposeNormalizer2 fcc;
361 };
362 
363 Norm2AllModes *
createInstance(const char * packageName,const char * name,UErrorCode & errorCode)364 Norm2AllModes::createInstance(const char *packageName,
365                               const char *name,
366                               UErrorCode &errorCode) {
367     if(U_FAILURE(errorCode)) {
368         return NULL;
369     }
370     LocalPointer<Norm2AllModes> allModes(new Norm2AllModes);
371     if(allModes.isNull()) {
372         errorCode=U_MEMORY_ALLOCATION_ERROR;
373         return NULL;
374     }
375     allModes->impl.load(packageName, name, errorCode);
376     return U_SUCCESS(errorCode) ? allModes.orphan() : NULL;
377 }
378 
379 U_CDECL_BEGIN
380 static UBool U_CALLCONV uprv_normalizer2_cleanup();
381 U_CDECL_END
382 
383 class Norm2AllModesSingleton : public TriStateSingletonWrapper<Norm2AllModes> {
384 public:
Norm2AllModesSingleton(TriStateSingleton & s,const char * n)385     Norm2AllModesSingleton(TriStateSingleton &s, const char *n) :
386         TriStateSingletonWrapper<Norm2AllModes>(s), name(n) {}
getInstance(UErrorCode & errorCode)387     Norm2AllModes *getInstance(UErrorCode &errorCode) {
388         return TriStateSingletonWrapper<Norm2AllModes>::getInstance(createInstance, name, errorCode);
389     }
390 private:
createInstance(const void * context,UErrorCode & errorCode)391     static void *createInstance(const void *context, UErrorCode &errorCode) {
392         ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
393         return Norm2AllModes::createInstance(NULL, (const char *)context, errorCode);
394     }
395 
396     const char *name;
397 };
398 
399 STATIC_TRI_STATE_SINGLETON(nfcSingleton);
400 STATIC_TRI_STATE_SINGLETON(nfkcSingleton);
401 STATIC_TRI_STATE_SINGLETON(nfkc_cfSingleton);
402 
403 class Norm2Singleton : public SimpleSingletonWrapper<Normalizer2> {
404 public:
Norm2Singleton(SimpleSingleton & s)405     Norm2Singleton(SimpleSingleton &s) : SimpleSingletonWrapper<Normalizer2>(s) {}
getInstance(UErrorCode & errorCode)406     Normalizer2 *getInstance(UErrorCode &errorCode) {
407         return SimpleSingletonWrapper<Normalizer2>::getInstance(createInstance, NULL, errorCode);
408     }
409 private:
createInstance(const void *,UErrorCode & errorCode)410     static void *createInstance(const void *, UErrorCode &errorCode) {
411         Normalizer2 *noop=new NoopNormalizer2;
412         if(noop==NULL) {
413             errorCode=U_MEMORY_ALLOCATION_ERROR;
414         }
415         ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
416         return noop;
417     }
418 };
419 
420 STATIC_SIMPLE_SINGLETON(noopSingleton);
421 
422 static UHashtable *cache=NULL;
423 
424 U_CDECL_BEGIN
425 
deleteNorm2AllModes(void * allModes)426 static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
427     delete (Norm2AllModes *)allModes;
428 }
429 
uprv_normalizer2_cleanup()430 static UBool U_CALLCONV uprv_normalizer2_cleanup() {
431     Norm2AllModesSingleton(nfcSingleton, NULL).deleteInstance();
432     Norm2AllModesSingleton(nfkcSingleton, NULL).deleteInstance();
433     Norm2AllModesSingleton(nfkc_cfSingleton, NULL).deleteInstance();
434     Norm2Singleton(noopSingleton).deleteInstance();
435     uhash_close(cache);
436     cache=NULL;
437     return TRUE;
438 }
439 
440 U_CDECL_END
441 
getNFCInstance(UErrorCode & errorCode)442 const Normalizer2 *Normalizer2Factory::getNFCInstance(UErrorCode &errorCode) {
443     Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
444     return allModes!=NULL ? &allModes->comp : NULL;
445 }
446 
getNFDInstance(UErrorCode & errorCode)447 const Normalizer2 *Normalizer2Factory::getNFDInstance(UErrorCode &errorCode) {
448     Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
449     return allModes!=NULL ? &allModes->decomp : NULL;
450 }
451 
getFCDInstance(UErrorCode & errorCode)452 const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
453     Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
454     if(allModes!=NULL) {
455         allModes->impl.getFCDTrie(errorCode);
456         return &allModes->fcd;
457     } else {
458         return NULL;
459     }
460 }
461 
getFCCInstance(UErrorCode & errorCode)462 const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
463     Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
464     return allModes!=NULL ? &allModes->fcc : NULL;
465 }
466 
getNFKCInstance(UErrorCode & errorCode)467 const Normalizer2 *Normalizer2Factory::getNFKCInstance(UErrorCode &errorCode) {
468     Norm2AllModes *allModes=
469         Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
470     return allModes!=NULL ? &allModes->comp : NULL;
471 }
472 
getNFKDInstance(UErrorCode & errorCode)473 const Normalizer2 *Normalizer2Factory::getNFKDInstance(UErrorCode &errorCode) {
474     Norm2AllModes *allModes=
475         Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
476     return allModes!=NULL ? &allModes->decomp : NULL;
477 }
478 
getNFKC_CFInstance(UErrorCode & errorCode)479 const Normalizer2 *Normalizer2Factory::getNFKC_CFInstance(UErrorCode &errorCode) {
480     Norm2AllModes *allModes=
481         Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
482     return allModes!=NULL ? &allModes->comp : NULL;
483 }
484 
getNoopInstance(UErrorCode & errorCode)485 const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) {
486     return Norm2Singleton(noopSingleton).getInstance(errorCode);
487 }
488 
489 const Normalizer2 *
getInstance(UNormalizationMode mode,UErrorCode & errorCode)490 Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
491     if(U_FAILURE(errorCode)) {
492         return NULL;
493     }
494     switch(mode) {
495     case UNORM_NFD:
496         return getNFDInstance(errorCode);
497     case UNORM_NFKD:
498         return getNFKDInstance(errorCode);
499     case UNORM_NFC:
500         return getNFCInstance(errorCode);
501     case UNORM_NFKC:
502         return getNFKCInstance(errorCode);
503     case UNORM_FCD:
504         return getFCDInstance(errorCode);
505     default:  // UNORM_NONE
506         return getNoopInstance(errorCode);
507     }
508 }
509 
510 const Normalizer2Impl *
getNFCImpl(UErrorCode & errorCode)511 Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
512     Norm2AllModes *allModes=
513         Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
514     return allModes!=NULL ? &allModes->impl : NULL;
515 }
516 
517 const Normalizer2Impl *
getNFKCImpl(UErrorCode & errorCode)518 Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
519     Norm2AllModes *allModes=
520         Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
521     return allModes!=NULL ? &allModes->impl : NULL;
522 }
523 
524 const Normalizer2Impl *
getNFKC_CFImpl(UErrorCode & errorCode)525 Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
526     Norm2AllModes *allModes=
527         Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
528     return allModes!=NULL ? &allModes->impl : NULL;
529 }
530 
531 const Normalizer2Impl *
getImpl(const Normalizer2 * norm2)532 Normalizer2Factory::getImpl(const Normalizer2 *norm2) {
533     return &((Normalizer2WithImpl *)norm2)->impl;
534 }
535 
536 const UTrie2 *
getFCDTrie(UErrorCode & errorCode)537 Normalizer2Factory::getFCDTrie(UErrorCode &errorCode) {
538     Norm2AllModes *allModes=
539         Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
540     if(allModes!=NULL) {
541         return allModes->impl.getFCDTrie(errorCode);
542     } else {
543         return NULL;
544     }
545 }
546 
547 const Normalizer2 *
getInstance(const char * packageName,const char * name,UNormalization2Mode mode,UErrorCode & errorCode)548 Normalizer2::getInstance(const char *packageName,
549                          const char *name,
550                          UNormalization2Mode mode,
551                          UErrorCode &errorCode) {
552     if(U_FAILURE(errorCode)) {
553         return NULL;
554     }
555     if(name==NULL || *name==0) {
556         errorCode=U_ILLEGAL_ARGUMENT_ERROR;
557     }
558     Norm2AllModes *allModes=NULL;
559     if(packageName==NULL) {
560         if(0==uprv_strcmp(name, "nfc")) {
561             allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
562         } else if(0==uprv_strcmp(name, "nfkc")) {
563             allModes=Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
564         } else if(0==uprv_strcmp(name, "nfkc_cf")) {
565             allModes=Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
566         }
567     }
568     if(allModes==NULL && U_SUCCESS(errorCode)) {
569         {
570             Mutex lock;
571             if(cache!=NULL) {
572                 allModes=(Norm2AllModes *)uhash_get(cache, name);
573             }
574         }
575         if(allModes==NULL) {
576             LocalPointer<Norm2AllModes> localAllModes(
577                 Norm2AllModes::createInstance(packageName, name, errorCode));
578             if(U_SUCCESS(errorCode)) {
579                 Mutex lock;
580                 if(cache==NULL) {
581                     cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode);
582                     if(U_FAILURE(errorCode)) {
583                         return NULL;
584                     }
585                     uhash_setKeyDeleter(cache, uprv_free);
586                     uhash_setValueDeleter(cache, deleteNorm2AllModes);
587                 }
588                 void *temp=uhash_get(cache, name);
589                 if(temp==NULL) {
590                     int32_t keyLength=uprv_strlen(name)+1;
591                     char *nameCopy=(char *)uprv_malloc(keyLength);
592                     if(nameCopy==NULL) {
593                         errorCode=U_MEMORY_ALLOCATION_ERROR;
594                         return NULL;
595                     }
596                     uprv_memcpy(nameCopy, name, keyLength);
597                     uhash_put(cache, nameCopy, allModes=localAllModes.orphan(), &errorCode);
598                 } else {
599                     // race condition
600                     allModes=(Norm2AllModes *)temp;
601                 }
602             }
603         }
604     }
605     if(allModes!=NULL && U_SUCCESS(errorCode)) {
606         switch(mode) {
607         case UNORM2_COMPOSE:
608             return &allModes->comp;
609         case UNORM2_DECOMPOSE:
610             return &allModes->decomp;
611         case UNORM2_FCD:
612             allModes->impl.getFCDTrie(errorCode);
613             return &allModes->fcd;
614         case UNORM2_COMPOSE_CONTIGUOUS:
615             return &allModes->fcc;
616         default:
617             break;  // do nothing
618         }
619     }
620     return NULL;
621 }
622 
UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(Normalizer2)623 UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(Normalizer2)
624 
625 U_NAMESPACE_END
626 
627 // C API ------------------------------------------------------------------- ***
628 
629 U_NAMESPACE_USE
630 
631 U_DRAFT const UNormalizer2 * U_EXPORT2
632 unorm2_getInstance(const char *packageName,
633                    const char *name,
634                    UNormalization2Mode mode,
635                    UErrorCode *pErrorCode) {
636     return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
637 }
638 
639 U_DRAFT void U_EXPORT2
unorm2_close(UNormalizer2 * norm2)640 unorm2_close(UNormalizer2 *norm2) {
641     delete (Normalizer2 *)norm2;
642 }
643 
644 U_DRAFT int32_t U_EXPORT2
unorm2_normalize(const UNormalizer2 * norm2,const UChar * src,int32_t length,UChar * dest,int32_t capacity,UErrorCode * pErrorCode)645 unorm2_normalize(const UNormalizer2 *norm2,
646                  const UChar *src, int32_t length,
647                  UChar *dest, int32_t capacity,
648                  UErrorCode *pErrorCode) {
649     if(U_FAILURE(*pErrorCode)) {
650         return 0;
651     }
652     if( (src==NULL ? length!=0 : length<-1) ||
653         (dest==NULL ? capacity!=0 : capacity<0) ||
654         (src==dest && src!=NULL)
655     ) {
656         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
657         return 0;
658     }
659     UnicodeString destString(dest, 0, capacity);
660     // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) would crash.
661     if(length!=0) {
662         const Normalizer2 *n2=(const Normalizer2 *)norm2;
663         const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
664         if(n2wi!=NULL) {
665             // Avoid duplicate argument checking and support NUL-terminated src.
666             ReorderingBuffer buffer(n2wi->impl, destString);
667             if(buffer.init(length, *pErrorCode)) {
668                 n2wi->normalize(src, length>=0 ? src+length : NULL, buffer, *pErrorCode);
669             }
670         } else {
671             UnicodeString srcString(length<0, src, length);
672             n2->normalize(srcString, destString, *pErrorCode);
673         }
674     }
675     return destString.extract(dest, capacity, *pErrorCode);
676 }
677 
678 static int32_t
normalizeSecondAndAppend(const UNormalizer2 * norm2,UChar * first,int32_t firstLength,int32_t firstCapacity,const UChar * second,int32_t secondLength,UBool doNormalize,UErrorCode * pErrorCode)679 normalizeSecondAndAppend(const UNormalizer2 *norm2,
680                          UChar *first, int32_t firstLength, int32_t firstCapacity,
681                          const UChar *second, int32_t secondLength,
682                          UBool doNormalize,
683                          UErrorCode *pErrorCode) {
684     if(U_FAILURE(*pErrorCode)) {
685         return 0;
686     }
687     if( (second==NULL ? secondLength!=0 : secondLength<-1) ||
688         (first==NULL ? (firstCapacity!=0 || firstLength!=0) :
689                        (firstCapacity<0 || firstLength<-1)) ||
690         (first==second && first!=NULL)
691     ) {
692         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
693         return 0;
694     }
695     UnicodeString firstString(first, firstLength, firstCapacity);
696     // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash.
697     if(secondLength!=0) {
698         const Normalizer2 *n2=(const Normalizer2 *)norm2;
699         const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
700         if(n2wi!=NULL) {
701             // Avoid duplicate argument checking and support NUL-terminated src.
702             ReorderingBuffer buffer(n2wi->impl, firstString);
703             if(buffer.init(firstLength+secondLength+1, *pErrorCode)) {  // destCapacity>=-1
704                 n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL,
705                                         doNormalize, buffer, *pErrorCode);
706             }
707         } else {
708             UnicodeString secondString(secondLength<0, second, secondLength);
709             if(doNormalize) {
710                 n2->normalizeSecondAndAppend(firstString, secondString, *pErrorCode);
711             } else {
712                 n2->append(firstString, secondString, *pErrorCode);
713             }
714         }
715     }
716     return firstString.extract(first, firstCapacity, *pErrorCode);
717 }
718 
719 U_DRAFT int32_t U_EXPORT2
unorm2_normalizeSecondAndAppend(const UNormalizer2 * norm2,UChar * first,int32_t firstLength,int32_t firstCapacity,const UChar * second,int32_t secondLength,UErrorCode * pErrorCode)720 unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2,
721                                 UChar *first, int32_t firstLength, int32_t firstCapacity,
722                                 const UChar *second, int32_t secondLength,
723                                 UErrorCode *pErrorCode) {
724     return normalizeSecondAndAppend(norm2,
725                                     first, firstLength, firstCapacity,
726                                     second, secondLength,
727                                     TRUE, pErrorCode);
728 }
729 
730 U_DRAFT int32_t U_EXPORT2
unorm2_append(const UNormalizer2 * norm2,UChar * first,int32_t firstLength,int32_t firstCapacity,const UChar * second,int32_t secondLength,UErrorCode * pErrorCode)731 unorm2_append(const UNormalizer2 *norm2,
732               UChar *first, int32_t firstLength, int32_t firstCapacity,
733               const UChar *second, int32_t secondLength,
734               UErrorCode *pErrorCode) {
735     return normalizeSecondAndAppend(norm2,
736                                     first, firstLength, firstCapacity,
737                                     second, secondLength,
738                                     FALSE, pErrorCode);
739 }
740 
741 U_DRAFT int32_t U_EXPORT2
unorm2_getDecomposition(const UNormalizer2 * norm2,UChar32 c,UChar * decomposition,int32_t capacity,UErrorCode * pErrorCode)742 unorm2_getDecomposition(const UNormalizer2 *norm2,
743                         UChar32 c, UChar *decomposition, int32_t capacity,
744                         UErrorCode *pErrorCode) {
745     if(U_FAILURE(*pErrorCode)) {
746         return 0;
747     }
748     if(decomposition==NULL ? capacity!=0 : capacity<0) {
749         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
750         return 0;
751     }
752     UnicodeString destString(decomposition, 0, capacity);
753     if(reinterpret_cast<const Normalizer2 *>(norm2)->getDecomposition(c, destString)) {
754         return destString.extract(decomposition, capacity, *pErrorCode);
755     } else {
756         return -1;
757     }
758 }
759 
760 U_DRAFT UBool U_EXPORT2
unorm2_isNormalized(const UNormalizer2 * norm2,const UChar * s,int32_t length,UErrorCode * pErrorCode)761 unorm2_isNormalized(const UNormalizer2 *norm2,
762                     const UChar *s, int32_t length,
763                     UErrorCode *pErrorCode) {
764     if(U_FAILURE(*pErrorCode)) {
765         return 0;
766     }
767     if((s==NULL && length!=0) || length<-1) {
768         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
769         return 0;
770     }
771     UnicodeString sString(length<0, s, length);
772     return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode);
773 }
774 
775 U_DRAFT UNormalizationCheckResult U_EXPORT2
unorm2_quickCheck(const UNormalizer2 * norm2,const UChar * s,int32_t length,UErrorCode * pErrorCode)776 unorm2_quickCheck(const UNormalizer2 *norm2,
777                   const UChar *s, int32_t length,
778                   UErrorCode *pErrorCode) {
779     if(U_FAILURE(*pErrorCode)) {
780         return UNORM_NO;
781     }
782     if((s==NULL && length!=0) || length<-1) {
783         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
784         return UNORM_NO;
785     }
786     UnicodeString sString(length<0, s, length);
787     return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode);
788 }
789 
790 U_DRAFT int32_t U_EXPORT2
unorm2_spanQuickCheckYes(const UNormalizer2 * norm2,const UChar * s,int32_t length,UErrorCode * pErrorCode)791 unorm2_spanQuickCheckYes(const UNormalizer2 *norm2,
792                          const UChar *s, int32_t length,
793                          UErrorCode *pErrorCode) {
794     if(U_FAILURE(*pErrorCode)) {
795         return 0;
796     }
797     if((s==NULL && length!=0) || length<-1) {
798         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
799         return 0;
800     }
801     UnicodeString sString(length<0, s, length);
802     return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode);
803 }
804 
805 U_DRAFT UBool U_EXPORT2
unorm2_hasBoundaryBefore(const UNormalizer2 * norm2,UChar32 c)806 unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) {
807     return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c);
808 }
809 
810 U_DRAFT UBool U_EXPORT2
unorm2_hasBoundaryAfter(const UNormalizer2 * norm2,UChar32 c)811 unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) {
812     return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c);
813 }
814 
815 U_DRAFT UBool U_EXPORT2
unorm2_isInert(const UNormalizer2 * norm2,UChar32 c)816 unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) {
817     return ((const Normalizer2 *)norm2)->isInert(c);
818 }
819 
820 // Some properties APIs ---------------------------------------------------- ***
821 
822 U_CFUNC UNormalizationCheckResult U_EXPORT2
unorm_getQuickCheck(UChar32 c,UNormalizationMode mode)823 unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
824     if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
825         return UNORM_YES;
826     }
827     UErrorCode errorCode=U_ZERO_ERROR;
828     const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
829     if(U_SUCCESS(errorCode)) {
830         return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
831     } else {
832         return UNORM_MAYBE;
833     }
834 }
835 
836 U_CAPI const uint16_t * U_EXPORT2
unorm_getFCDTrieIndex(UChar32 & fcdHighStart,UErrorCode * pErrorCode)837 unorm_getFCDTrieIndex(UChar32 &fcdHighStart, UErrorCode *pErrorCode) {
838     const UTrie2 *trie=Normalizer2Factory::getFCDTrie(*pErrorCode);
839     if(U_SUCCESS(*pErrorCode)) {
840         fcdHighStart=trie->highStart;
841         return trie->index;
842     } else {
843         return NULL;
844     }
845 }
846 
847 #endif  // !UCONFIG_NO_NORMALIZATION
848