• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 *
4 *   Copyright (C) 2009-2010, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *   file name:  normalizer2.cpp
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2009nov22
14 *   created by: Markus W. Scherer
15 */
16 
17 #include "unicode/utypes.h"
18 
19 #if !UCONFIG_NO_NORMALIZATION
20 
21 #include "unicode/localpointer.h"
22 #include "unicode/normalizer2.h"
23 #include "unicode/unistr.h"
24 #include "unicode/unorm.h"
25 #include "cpputils.h"
26 #include "cstring.h"
27 #include "mutex.h"
28 #include "normalizer2impl.h"
29 #include "ucln_cmn.h"
30 #include "uhash.h"
31 
32 U_NAMESPACE_BEGIN
33 
34 // Public API dispatch via Normalizer2 subclasses -------------------------- ***
35 
36 // Normalizer2 implementation for the old UNORM_NONE.
37 class NoopNormalizer2 : public Normalizer2 {
38     virtual UnicodeString &
normalize(const UnicodeString & src,UnicodeString & dest,UErrorCode & errorCode) const39     normalize(const UnicodeString &src,
40               UnicodeString &dest,
41               UErrorCode &errorCode) const {
42         if(U_SUCCESS(errorCode)) {
43             if(&dest!=&src) {
44                 dest=src;
45             } else {
46                 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
47             }
48         }
49         return dest;
50     }
51     virtual UnicodeString &
normalizeSecondAndAppend(UnicodeString & first,const UnicodeString & second,UErrorCode & errorCode) const52     normalizeSecondAndAppend(UnicodeString &first,
53                              const UnicodeString &second,
54                              UErrorCode &errorCode) const {
55         if(U_SUCCESS(errorCode)) {
56             if(&first!=&second) {
57                 first.append(second);
58             } else {
59                 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
60             }
61         }
62         return first;
63     }
64     virtual UnicodeString &
append(UnicodeString & first,const UnicodeString & second,UErrorCode & errorCode) const65     append(UnicodeString &first,
66            const UnicodeString &second,
67            UErrorCode &errorCode) const {
68         if(U_SUCCESS(errorCode)) {
69             if(&first!=&second) {
70                 first.append(second);
71             } else {
72                 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
73             }
74         }
75         return first;
76     }
77     virtual UBool
isNormalized(const UnicodeString &,UErrorCode &) const78     isNormalized(const UnicodeString &, UErrorCode &) const {
79         return TRUE;
80     }
81     virtual UNormalizationCheckResult
quickCheck(const UnicodeString &,UErrorCode &) const82     quickCheck(const UnicodeString &, UErrorCode &) const {
83         return UNORM_YES;
84     }
85     virtual int32_t
spanQuickCheckYes(const UnicodeString & s,UErrorCode &) const86     spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const {
87         return s.length();
88     }
hasBoundaryBefore(UChar32) const89     virtual UBool hasBoundaryBefore(UChar32) const { return TRUE; }
hasBoundaryAfter(UChar32) const90     virtual UBool hasBoundaryAfter(UChar32) const { return TRUE; }
isInert(UChar32) const91     virtual UBool isInert(UChar32) const { return TRUE; }
92 
93     static UClassID U_EXPORT2 getStaticClassID();
94     virtual UClassID getDynamicClassID() const;
95 };
96 
97 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(NoopNormalizer2)
98 
99 // Intermediate class:
100 // Has Normalizer2Impl and does boilerplate argument checking and setup.
101 class Normalizer2WithImpl : public Normalizer2 {
102 public:
Normalizer2WithImpl(const Normalizer2Impl & ni)103     Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
104 
105     // normalize
106     virtual UnicodeString &
normalize(const UnicodeString & src,UnicodeString & dest,UErrorCode & errorCode) const107     normalize(const UnicodeString &src,
108               UnicodeString &dest,
109               UErrorCode &errorCode) const {
110         if(U_FAILURE(errorCode)) {
111             dest.setToBogus();
112             return dest;
113         }
114         const UChar *sArray=src.getBuffer();
115         if(&dest==&src || sArray==NULL) {
116             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
117             dest.setToBogus();
118             return dest;
119         }
120         dest.remove();
121         ReorderingBuffer buffer(impl, dest);
122         if(buffer.init(src.length(), errorCode)) {
123             normalize(sArray, sArray+src.length(), buffer, errorCode);
124         }
125         return dest;
126     }
127     virtual void
128     normalize(const UChar *src, const UChar *limit,
129               ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
130 
131     // normalize and append
132     virtual UnicodeString &
normalizeSecondAndAppend(UnicodeString & first,const UnicodeString & second,UErrorCode & errorCode) const133     normalizeSecondAndAppend(UnicodeString &first,
134                              const UnicodeString &second,
135                              UErrorCode &errorCode) const {
136         return normalizeSecondAndAppend(first, second, TRUE, errorCode);
137     }
138     virtual UnicodeString &
append(UnicodeString & first,const UnicodeString & second,UErrorCode & errorCode) const139     append(UnicodeString &first,
140            const UnicodeString &second,
141            UErrorCode &errorCode) const {
142         return normalizeSecondAndAppend(first, second, FALSE, errorCode);
143     }
144     UnicodeString &
normalizeSecondAndAppend(UnicodeString & first,const UnicodeString & second,UBool doNormalize,UErrorCode & errorCode) const145     normalizeSecondAndAppend(UnicodeString &first,
146                              const UnicodeString &second,
147                              UBool doNormalize,
148                              UErrorCode &errorCode) const {
149         uprv_checkCanGetBuffer(first, errorCode);
150         if(U_FAILURE(errorCode)) {
151             return first;
152         }
153         const UChar *secondArray=second.getBuffer();
154         if(&first==&second || secondArray==NULL) {
155             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
156             return first;
157         }
158         ReorderingBuffer buffer(impl, first);
159         if(buffer.init(first.length()+second.length(), errorCode)) {
160             normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
161                                buffer, errorCode);
162         }
163         return first;
164     }
165     virtual void
166     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
167                        ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
168 
169     // quick checks
170     virtual UBool
isNormalized(const UnicodeString & s,UErrorCode & errorCode) const171     isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
172         if(U_FAILURE(errorCode)) {
173             return FALSE;
174         }
175         const UChar *sArray=s.getBuffer();
176         if(sArray==NULL) {
177             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
178             return FALSE;
179         }
180         const UChar *sLimit=sArray+s.length();
181         return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
182     }
183     virtual UNormalizationCheckResult
quickCheck(const UnicodeString & s,UErrorCode & errorCode) const184     quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
185         return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
186     }
187     virtual int32_t
spanQuickCheckYes(const UnicodeString & s,UErrorCode & errorCode) const188     spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
189         if(U_FAILURE(errorCode)) {
190             return 0;
191         }
192         const UChar *sArray=s.getBuffer();
193         if(sArray==NULL) {
194             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
195             return 0;
196         }
197         return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
198     }
199     virtual const UChar *
200     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0;
201 
getQuickCheck(UChar32) const202     virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
203         return UNORM_YES;
204     }
205 
206     static UClassID U_EXPORT2 getStaticClassID();
207     virtual UClassID getDynamicClassID() const;
208 
209     const Normalizer2Impl &impl;
210 };
211 
212 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Normalizer2WithImpl)
213 
214 class DecomposeNormalizer2 : public Normalizer2WithImpl {
215 public:
DecomposeNormalizer2(const Normalizer2Impl & ni)216     DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
217 
218 private:
219     virtual void
normalize(const UChar * src,const UChar * limit,ReorderingBuffer & buffer,UErrorCode & errorCode) const220     normalize(const UChar *src, const UChar *limit,
221               ReorderingBuffer &buffer, UErrorCode &errorCode) const {
222         impl.decompose(src, limit, &buffer, errorCode);
223     }
224     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
225     virtual void
normalizeAndAppend(const UChar * src,const UChar * limit,UBool doNormalize,ReorderingBuffer & buffer,UErrorCode & errorCode) const226     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
227                        ReorderingBuffer &buffer, UErrorCode &errorCode) const {
228         impl.decomposeAndAppend(src, limit, doNormalize, buffer, errorCode);
229     }
230     virtual const UChar *
spanQuickCheckYes(const UChar * src,const UChar * limit,UErrorCode & errorCode) const231     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
232         return impl.decompose(src, limit, NULL, errorCode);
233     }
234     using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
getQuickCheck(UChar32 c) const235     virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
236         return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
237     }
hasBoundaryBefore(UChar32 c) const238     virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); }
hasBoundaryAfter(UChar32 c) const239     virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); }
isInert(UChar32 c) const240     virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
241 };
242 
243 class ComposeNormalizer2 : public Normalizer2WithImpl {
244 public:
ComposeNormalizer2(const Normalizer2Impl & ni,UBool fcc)245     ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
246         Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
247 
248 private:
249     virtual void
normalize(const UChar * src,const UChar * limit,ReorderingBuffer & buffer,UErrorCode & errorCode) const250     normalize(const UChar *src, const UChar *limit,
251               ReorderingBuffer &buffer, UErrorCode &errorCode) const {
252         impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
253     }
254     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
255     virtual void
normalizeAndAppend(const UChar * src,const UChar * limit,UBool doNormalize,ReorderingBuffer & buffer,UErrorCode & errorCode) const256     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
257                        ReorderingBuffer &buffer, UErrorCode &errorCode) const {
258         impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, buffer, errorCode);
259     }
260 
261     virtual UBool
isNormalized(const UnicodeString & s,UErrorCode & errorCode) const262     isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
263         if(U_FAILURE(errorCode)) {
264             return FALSE;
265         }
266         const UChar *sArray=s.getBuffer();
267         if(sArray==NULL) {
268             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
269             return FALSE;
270         }
271         UnicodeString temp;
272         ReorderingBuffer buffer(impl, temp);
273         if(!buffer.init(5, errorCode)) {  // small destCapacity for substring normalization
274             return FALSE;
275         }
276         return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode);
277     }
278     virtual UNormalizationCheckResult
quickCheck(const UnicodeString & s,UErrorCode & errorCode) const279     quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
280         if(U_FAILURE(errorCode)) {
281             return UNORM_MAYBE;
282         }
283         const UChar *sArray=s.getBuffer();
284         if(sArray==NULL) {
285             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
286             return UNORM_MAYBE;
287         }
288         UNormalizationCheckResult qcResult=UNORM_YES;
289         impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
290         return qcResult;
291     }
292     virtual const UChar *
spanQuickCheckYes(const UChar * src,const UChar * limit,UErrorCode &) const293     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const {
294         return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
295     }
296     using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
getQuickCheck(UChar32 c) const297     virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
298         return impl.getCompQuickCheck(impl.getNorm16(c));
299     }
hasBoundaryBefore(UChar32 c) const300     virtual UBool hasBoundaryBefore(UChar32 c) const {
301         return impl.hasCompBoundaryBefore(c);
302     }
hasBoundaryAfter(UChar32 c) const303     virtual UBool hasBoundaryAfter(UChar32 c) const {
304         return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE);
305     }
isInert(UChar32 c) const306     virtual UBool isInert(UChar32 c) const {
307         return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE);
308     }
309 
310     const UBool onlyContiguous;
311 };
312 
313 class FCDNormalizer2 : public Normalizer2WithImpl {
314 public:
FCDNormalizer2(const Normalizer2Impl & ni)315     FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
316 
317 private:
318     virtual void
normalize(const UChar * src,const UChar * limit,ReorderingBuffer & buffer,UErrorCode & errorCode) const319     normalize(const UChar *src, const UChar *limit,
320               ReorderingBuffer &buffer, UErrorCode &errorCode) const {
321         impl.makeFCD(src, limit, &buffer, errorCode);
322     }
323     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
324     virtual void
normalizeAndAppend(const UChar * src,const UChar * limit,UBool doNormalize,ReorderingBuffer & buffer,UErrorCode & errorCode) const325     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
326                        ReorderingBuffer &buffer, UErrorCode &errorCode) const {
327         impl.makeFCDAndAppend(src, limit, doNormalize, buffer, errorCode);
328     }
329     virtual const UChar *
spanQuickCheckYes(const UChar * src,const UChar * limit,UErrorCode & errorCode) const330     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
331         return impl.makeFCD(src, limit, NULL, errorCode);
332     }
333     using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
hasBoundaryBefore(UChar32 c) const334     virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
hasBoundaryAfter(UChar32 c) const335     virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
isInert(UChar32 c) const336     virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
337 };
338 
339 // instance cache ---------------------------------------------------------- ***
340 
341 struct Norm2AllModes : public UMemory {
342     static Norm2AllModes *createInstance(const char *packageName,
343                                          const char *name,
344                                          UErrorCode &errorCode);
Norm2AllModesNorm2AllModes345     Norm2AllModes() : comp(impl, FALSE), decomp(impl), fcd(impl), fcc(impl, TRUE) {}
346 
347     Normalizer2Impl impl;
348     ComposeNormalizer2 comp;
349     DecomposeNormalizer2 decomp;
350     FCDNormalizer2 fcd;
351     ComposeNormalizer2 fcc;
352 };
353 
354 Norm2AllModes *
createInstance(const char * packageName,const char * name,UErrorCode & errorCode)355 Norm2AllModes::createInstance(const char *packageName,
356                               const char *name,
357                               UErrorCode &errorCode) {
358     if(U_FAILURE(errorCode)) {
359         return NULL;
360     }
361     LocalPointer<Norm2AllModes> allModes(new Norm2AllModes);
362     if(allModes.isNull()) {
363         errorCode=U_MEMORY_ALLOCATION_ERROR;
364         return NULL;
365     }
366     allModes->impl.load(packageName, name, errorCode);
367     return U_SUCCESS(errorCode) ? allModes.orphan() : NULL;
368 }
369 
370 U_CDECL_BEGIN
371 static UBool U_CALLCONV uprv_normalizer2_cleanup();
372 U_CDECL_END
373 
374 class Norm2AllModesSingleton : public TriStateSingletonWrapper<Norm2AllModes> {
375 public:
Norm2AllModesSingleton(TriStateSingleton & s,const char * n)376     Norm2AllModesSingleton(TriStateSingleton &s, const char *n) :
377         TriStateSingletonWrapper<Norm2AllModes>(s), name(n) {}
getInstance(UErrorCode & errorCode)378     Norm2AllModes *getInstance(UErrorCode &errorCode) {
379         return TriStateSingletonWrapper<Norm2AllModes>::getInstance(createInstance, name, errorCode);
380     }
381 private:
createInstance(const void * context,UErrorCode & errorCode)382     static void *createInstance(const void *context, UErrorCode &errorCode) {
383         ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
384         return Norm2AllModes::createInstance(NULL, (const char *)context, errorCode);
385     }
386 
387     const char *name;
388 };
389 
390 STATIC_TRI_STATE_SINGLETON(nfcSingleton);
391 STATIC_TRI_STATE_SINGLETON(nfkcSingleton);
392 STATIC_TRI_STATE_SINGLETON(nfkc_cfSingleton);
393 
394 class Norm2Singleton : public SimpleSingletonWrapper<Normalizer2> {
395 public:
Norm2Singleton(SimpleSingleton & s)396     Norm2Singleton(SimpleSingleton &s) : SimpleSingletonWrapper<Normalizer2>(s) {}
getInstance(UErrorCode & errorCode)397     Normalizer2 *getInstance(UErrorCode &errorCode) {
398         return SimpleSingletonWrapper<Normalizer2>::getInstance(createInstance, NULL, errorCode);
399     }
400 private:
createInstance(const void *,UErrorCode & errorCode)401     static void *createInstance(const void *, UErrorCode &errorCode) {
402         Normalizer2 *noop=new NoopNormalizer2;
403         if(noop==NULL) {
404             errorCode=U_MEMORY_ALLOCATION_ERROR;
405         }
406         ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
407         return noop;
408     }
409 };
410 
411 STATIC_SIMPLE_SINGLETON(noopSingleton);
412 
413 static UHashtable *cache=NULL;
414 
415 U_CDECL_BEGIN
416 
deleteNorm2AllModes(void * allModes)417 static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
418     delete (Norm2AllModes *)allModes;
419 }
420 
uprv_normalizer2_cleanup()421 static UBool U_CALLCONV uprv_normalizer2_cleanup() {
422     Norm2AllModesSingleton(nfcSingleton, NULL).deleteInstance();
423     Norm2AllModesSingleton(nfkcSingleton, NULL).deleteInstance();
424     Norm2AllModesSingleton(nfkc_cfSingleton, NULL).deleteInstance();
425     Norm2Singleton(noopSingleton).deleteInstance();
426     uhash_close(cache);
427     cache=NULL;
428     return TRUE;
429 }
430 
431 U_CDECL_END
432 
getNFCInstance(UErrorCode & errorCode)433 const Normalizer2 *Normalizer2Factory::getNFCInstance(UErrorCode &errorCode) {
434     Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
435     return allModes!=NULL ? &allModes->comp : NULL;
436 }
437 
getNFDInstance(UErrorCode & errorCode)438 const Normalizer2 *Normalizer2Factory::getNFDInstance(UErrorCode &errorCode) {
439     Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
440     return allModes!=NULL ? &allModes->decomp : NULL;
441 }
442 
getFCDInstance(UErrorCode & errorCode)443 const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
444     Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
445     if(allModes!=NULL) {
446         allModes->impl.getFCDTrie(errorCode);
447         return &allModes->fcd;
448     } else {
449         return NULL;
450     }
451 }
452 
getFCCInstance(UErrorCode & errorCode)453 const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
454     Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
455     return allModes!=NULL ? &allModes->fcc : NULL;
456 }
457 
getNFKCInstance(UErrorCode & errorCode)458 const Normalizer2 *Normalizer2Factory::getNFKCInstance(UErrorCode &errorCode) {
459     Norm2AllModes *allModes=
460         Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
461     return allModes!=NULL ? &allModes->comp : NULL;
462 }
463 
getNFKDInstance(UErrorCode & errorCode)464 const Normalizer2 *Normalizer2Factory::getNFKDInstance(UErrorCode &errorCode) {
465     Norm2AllModes *allModes=
466         Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
467     return allModes!=NULL ? &allModes->decomp : NULL;
468 }
469 
getNFKC_CFInstance(UErrorCode & errorCode)470 const Normalizer2 *Normalizer2Factory::getNFKC_CFInstance(UErrorCode &errorCode) {
471     Norm2AllModes *allModes=
472         Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
473     return allModes!=NULL ? &allModes->comp : NULL;
474 }
475 
getNoopInstance(UErrorCode & errorCode)476 const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) {
477     return Norm2Singleton(noopSingleton).getInstance(errorCode);
478 }
479 
480 const Normalizer2 *
getInstance(UNormalizationMode mode,UErrorCode & errorCode)481 Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
482     if(U_FAILURE(errorCode)) {
483         return NULL;
484     }
485     switch(mode) {
486     case UNORM_NFD:
487         return getNFDInstance(errorCode);
488     case UNORM_NFKD:
489         return getNFKDInstance(errorCode);
490     case UNORM_NFC:
491         return getNFCInstance(errorCode);
492     case UNORM_NFKC:
493         return getNFKCInstance(errorCode);
494     case UNORM_FCD:
495         return getFCDInstance(errorCode);
496     default:  // UNORM_NONE
497         return getNoopInstance(errorCode);
498     }
499 }
500 
501 const Normalizer2Impl *
getNFCImpl(UErrorCode & errorCode)502 Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
503     Norm2AllModes *allModes=
504         Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
505     return allModes!=NULL ? &allModes->impl : NULL;
506 }
507 
508 const Normalizer2Impl *
getNFKCImpl(UErrorCode & errorCode)509 Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
510     Norm2AllModes *allModes=
511         Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
512     return allModes!=NULL ? &allModes->impl : NULL;
513 }
514 
515 const Normalizer2Impl *
getNFKC_CFImpl(UErrorCode & errorCode)516 Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
517     Norm2AllModes *allModes=
518         Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
519     return allModes!=NULL ? &allModes->impl : NULL;
520 }
521 
522 const Normalizer2Impl *
getImpl(const Normalizer2 * norm2)523 Normalizer2Factory::getImpl(const Normalizer2 *norm2) {
524     return &((Normalizer2WithImpl *)norm2)->impl;
525 }
526 
527 const UTrie2 *
getFCDTrie(UErrorCode & errorCode)528 Normalizer2Factory::getFCDTrie(UErrorCode &errorCode) {
529     Norm2AllModes *allModes=
530         Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
531     if(allModes!=NULL) {
532         return allModes->impl.getFCDTrie(errorCode);
533     } else {
534         return NULL;
535     }
536 }
537 
538 const Normalizer2 *
getInstance(const char * packageName,const char * name,UNormalization2Mode mode,UErrorCode & errorCode)539 Normalizer2::getInstance(const char *packageName,
540                          const char *name,
541                          UNormalization2Mode mode,
542                          UErrorCode &errorCode) {
543     if(U_FAILURE(errorCode)) {
544         return NULL;
545     }
546     if(name==NULL || *name==0) {
547         errorCode=U_ILLEGAL_ARGUMENT_ERROR;
548     }
549     Norm2AllModes *allModes=NULL;
550     if(packageName==NULL) {
551         if(0==uprv_strcmp(name, "nfc")) {
552             allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
553         } else if(0==uprv_strcmp(name, "nfkc")) {
554             allModes=Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
555         } else if(0==uprv_strcmp(name, "nfkc_cf")) {
556             allModes=Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
557         }
558     }
559     if(allModes==NULL && U_SUCCESS(errorCode)) {
560         {
561             Mutex lock;
562             if(cache!=NULL) {
563                 allModes=(Norm2AllModes *)uhash_get(cache, name);
564             }
565         }
566         if(allModes==NULL) {
567             LocalPointer<Norm2AllModes> localAllModes(
568                 Norm2AllModes::createInstance(packageName, name, errorCode));
569             if(U_SUCCESS(errorCode)) {
570                 Mutex lock;
571                 if(cache==NULL) {
572                     cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode);
573                     if(U_FAILURE(errorCode)) {
574                         return NULL;
575                     }
576                     uhash_setKeyDeleter(cache, uprv_free);
577                     uhash_setValueDeleter(cache, deleteNorm2AllModes);
578                 }
579                 void *temp=uhash_get(cache, name);
580                 if(temp==NULL) {
581                     int32_t keyLength=uprv_strlen(name)+1;
582                     char *nameCopy=(char *)uprv_malloc(keyLength);
583                     if(nameCopy==NULL) {
584                         errorCode=U_MEMORY_ALLOCATION_ERROR;
585                         return NULL;
586                     }
587                     uprv_memcpy(nameCopy, name, keyLength);
588                     uhash_put(cache, nameCopy, allModes=localAllModes.orphan(), &errorCode);
589                 } else {
590                     // race condition
591                     allModes=(Norm2AllModes *)temp;
592                 }
593             }
594         }
595     }
596     if(allModes!=NULL && U_SUCCESS(errorCode)) {
597         switch(mode) {
598         case UNORM2_COMPOSE:
599             return &allModes->comp;
600         case UNORM2_DECOMPOSE:
601             return &allModes->decomp;
602         case UNORM2_FCD:
603             allModes->impl.getFCDTrie(errorCode);
604             return &allModes->fcd;
605         case UNORM2_COMPOSE_CONTIGUOUS:
606             return &allModes->fcc;
607         default:
608             break;  // do nothing
609         }
610     }
611     return NULL;
612 }
613 
UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(Normalizer2)614 UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(Normalizer2)
615 
616 U_NAMESPACE_END
617 
618 // C API ------------------------------------------------------------------- ***
619 
620 U_NAMESPACE_USE
621 
622 U_DRAFT const UNormalizer2 * U_EXPORT2
623 unorm2_getInstance(const char *packageName,
624                    const char *name,
625                    UNormalization2Mode mode,
626                    UErrorCode *pErrorCode) {
627     return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
628 }
629 
630 U_DRAFT void U_EXPORT2
unorm2_close(UNormalizer2 * norm2)631 unorm2_close(UNormalizer2 *norm2) {
632     delete (Normalizer2 *)norm2;
633 }
634 
635 U_DRAFT int32_t U_EXPORT2
unorm2_normalize(const UNormalizer2 * norm2,const UChar * src,int32_t length,UChar * dest,int32_t capacity,UErrorCode * pErrorCode)636 unorm2_normalize(const UNormalizer2 *norm2,
637                  const UChar *src, int32_t length,
638                  UChar *dest, int32_t capacity,
639                  UErrorCode *pErrorCode) {
640     if(U_FAILURE(*pErrorCode)) {
641         return 0;
642     }
643     if(src==NULL || length<-1 || capacity<0 || (dest==NULL && capacity>0) || src==dest) {
644         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
645         return 0;
646     }
647     UnicodeString destString(dest, 0, capacity);
648     const Normalizer2 *n2=(const Normalizer2 *)norm2;
649     if(n2->getDynamicClassID()==Normalizer2WithImpl::getStaticClassID()) {
650         // Avoid duplicate argument checking and support NUL-terminated src.
651         const Normalizer2WithImpl *n2wi=(const Normalizer2WithImpl *)n2;
652         ReorderingBuffer buffer(n2wi->impl, destString);
653         if(buffer.init(length, *pErrorCode)) {
654             n2wi->normalize(src, length>=0 ? src+length : NULL, buffer, *pErrorCode);
655         }
656     } else {
657         UnicodeString srcString(length<0, src, length);
658         n2->normalize(srcString, destString, *pErrorCode);
659     }
660     return destString.extract(dest, capacity, *pErrorCode);
661 }
662 
663 static int32_t
normalizeSecondAndAppend(const UNormalizer2 * norm2,UChar * first,int32_t firstLength,int32_t firstCapacity,const UChar * second,int32_t secondLength,UBool doNormalize,UErrorCode * pErrorCode)664 normalizeSecondAndAppend(const UNormalizer2 *norm2,
665                          UChar *first, int32_t firstLength, int32_t firstCapacity,
666                          const UChar *second, int32_t secondLength,
667                          UBool doNormalize,
668                          UErrorCode *pErrorCode) {
669     if(U_FAILURE(*pErrorCode)) {
670         return 0;
671     }
672     if( second==NULL || secondLength<-1 ||
673         firstCapacity<0 || (first==NULL && firstCapacity>0) || firstLength<-1 ||
674         first==second
675     ) {
676         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
677         return 0;
678     }
679     UnicodeString firstString(first, firstLength, firstCapacity);
680     const Normalizer2 *n2=(const Normalizer2 *)norm2;
681     if(n2->getDynamicClassID()==Normalizer2WithImpl::getStaticClassID()) {
682         // Avoid duplicate argument checking and support NUL-terminated src.
683         const Normalizer2WithImpl *n2wi=(const Normalizer2WithImpl *)n2;
684         ReorderingBuffer buffer(n2wi->impl, firstString);
685         if(buffer.init(firstLength+secondLength+1, *pErrorCode)) {  // destCapacity>=-1
686             n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL,
687                                      doNormalize, buffer, *pErrorCode);
688         }
689     } else {
690         UnicodeString secondString(secondLength<0, second, secondLength);
691         if(doNormalize) {
692             n2->normalizeSecondAndAppend(firstString, secondString, *pErrorCode);
693         } else {
694             n2->append(firstString, secondString, *pErrorCode);
695         }
696     }
697     return firstString.extract(first, firstCapacity, *pErrorCode);
698 }
699 
700 U_DRAFT int32_t U_EXPORT2
unorm2_normalizeSecondAndAppend(const UNormalizer2 * norm2,UChar * first,int32_t firstLength,int32_t firstCapacity,const UChar * second,int32_t secondLength,UErrorCode * pErrorCode)701 unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2,
702                                 UChar *first, int32_t firstLength, int32_t firstCapacity,
703                                 const UChar *second, int32_t secondLength,
704                                 UErrorCode *pErrorCode) {
705     return normalizeSecondAndAppend(norm2,
706                                     first, firstLength, firstCapacity,
707                                     second, secondLength,
708                                     TRUE, pErrorCode);
709 }
710 
711 U_DRAFT int32_t U_EXPORT2
unorm2_append(const UNormalizer2 * norm2,UChar * first,int32_t firstLength,int32_t firstCapacity,const UChar * second,int32_t secondLength,UErrorCode * pErrorCode)712 unorm2_append(const UNormalizer2 *norm2,
713               UChar *first, int32_t firstLength, int32_t firstCapacity,
714               const UChar *second, int32_t secondLength,
715               UErrorCode *pErrorCode) {
716     return normalizeSecondAndAppend(norm2,
717                                     first, firstLength, firstCapacity,
718                                     second, secondLength,
719                                     FALSE, pErrorCode);
720 }
721 
722 U_DRAFT UBool U_EXPORT2
unorm2_isNormalized(const UNormalizer2 * norm2,const UChar * s,int32_t length,UErrorCode * pErrorCode)723 unorm2_isNormalized(const UNormalizer2 *norm2,
724                     const UChar *s, int32_t length,
725                     UErrorCode *pErrorCode) {
726     if(U_FAILURE(*pErrorCode)) {
727         return 0;
728     }
729     if(s==NULL || length<-1) {
730         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
731         return 0;
732     }
733     UnicodeString sString(length<0, s, length);
734     return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode);
735 }
736 
737 U_DRAFT UNormalizationCheckResult U_EXPORT2
unorm2_quickCheck(const UNormalizer2 * norm2,const UChar * s,int32_t length,UErrorCode * pErrorCode)738 unorm2_quickCheck(const UNormalizer2 *norm2,
739                   const UChar *s, int32_t length,
740                   UErrorCode *pErrorCode) {
741     if(U_FAILURE(*pErrorCode)) {
742         return UNORM_NO;
743     }
744     if(s==NULL || length<-1) {
745         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
746         return UNORM_NO;
747     }
748     UnicodeString sString(length<0, s, length);
749     return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode);
750 }
751 
752 U_DRAFT int32_t U_EXPORT2
unorm2_spanQuickCheckYes(const UNormalizer2 * norm2,const UChar * s,int32_t length,UErrorCode * pErrorCode)753 unorm2_spanQuickCheckYes(const UNormalizer2 *norm2,
754                          const UChar *s, int32_t length,
755                          UErrorCode *pErrorCode) {
756     if(U_FAILURE(*pErrorCode)) {
757         return 0;
758     }
759     if(s==NULL || length<-1) {
760         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
761         return 0;
762     }
763     UnicodeString sString(length<0, s, length);
764     return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode);
765 }
766 
767 U_DRAFT UBool U_EXPORT2
unorm2_hasBoundaryBefore(const UNormalizer2 * norm2,UChar32 c)768 unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) {
769     return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c);
770 }
771 
772 U_DRAFT UBool U_EXPORT2
unorm2_hasBoundaryAfter(const UNormalizer2 * norm2,UChar32 c)773 unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) {
774     return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c);
775 }
776 
777 U_DRAFT UBool U_EXPORT2
unorm2_isInert(const UNormalizer2 * norm2,UChar32 c)778 unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) {
779     return ((const Normalizer2 *)norm2)->isInert(c);
780 }
781 
782 // Some properties APIs ---------------------------------------------------- ***
783 
784 U_CFUNC UNormalizationCheckResult U_EXPORT2
unorm_getQuickCheck(UChar32 c,UNormalizationMode mode)785 unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
786     if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
787         return UNORM_YES;
788     }
789     UErrorCode errorCode=U_ZERO_ERROR;
790     const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
791     if(U_SUCCESS(errorCode)) {
792         return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
793     } else {
794         return UNORM_MAYBE;
795     }
796 }
797 
798 U_CAPI const uint16_t * U_EXPORT2
unorm_getFCDTrieIndex(UChar32 & fcdHighStart,UErrorCode * pErrorCode)799 unorm_getFCDTrieIndex(UChar32 &fcdHighStart, UErrorCode *pErrorCode) {
800     const UTrie2 *trie=Normalizer2Factory::getFCDTrie(*pErrorCode);
801     if(U_SUCCESS(*pErrorCode)) {
802         fcdHighStart=trie->highStart;
803         return trie->index;
804     } else {
805         return NULL;
806     }
807 }
808 
809 #endif  // !UCONFIG_NO_NORMALIZATION
810