1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2009-2010, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: normalizer2.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2009nov22
14 * created by: Markus W. Scherer
15 */
16
17 #include "unicode/utypes.h"
18
19 #if !UCONFIG_NO_NORMALIZATION
20
21 #include "unicode/localpointer.h"
22 #include "unicode/normalizer2.h"
23 #include "unicode/unistr.h"
24 #include "unicode/unorm.h"
25 #include "cpputils.h"
26 #include "cstring.h"
27 #include "mutex.h"
28 #include "normalizer2impl.h"
29 #include "ucln_cmn.h"
30 #include "uhash.h"
31
32 U_NAMESPACE_BEGIN
33
34 // Public API dispatch via Normalizer2 subclasses -------------------------- ***
35
36 // Normalizer2 implementation for the old UNORM_NONE.
37 class NoopNormalizer2 : public Normalizer2 {
38 virtual UnicodeString &
normalize(const UnicodeString & src,UnicodeString & dest,UErrorCode & errorCode) const39 normalize(const UnicodeString &src,
40 UnicodeString &dest,
41 UErrorCode &errorCode) const {
42 if(U_SUCCESS(errorCode)) {
43 if(&dest!=&src) {
44 dest=src;
45 } else {
46 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
47 }
48 }
49 return dest;
50 }
51 virtual UnicodeString &
normalizeSecondAndAppend(UnicodeString & first,const UnicodeString & second,UErrorCode & errorCode) const52 normalizeSecondAndAppend(UnicodeString &first,
53 const UnicodeString &second,
54 UErrorCode &errorCode) const {
55 if(U_SUCCESS(errorCode)) {
56 if(&first!=&second) {
57 first.append(second);
58 } else {
59 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
60 }
61 }
62 return first;
63 }
64 virtual UnicodeString &
append(UnicodeString & first,const UnicodeString & second,UErrorCode & errorCode) const65 append(UnicodeString &first,
66 const UnicodeString &second,
67 UErrorCode &errorCode) const {
68 if(U_SUCCESS(errorCode)) {
69 if(&first!=&second) {
70 first.append(second);
71 } else {
72 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
73 }
74 }
75 return first;
76 }
77 virtual UBool
isNormalized(const UnicodeString &,UErrorCode &) const78 isNormalized(const UnicodeString &, UErrorCode &) const {
79 return TRUE;
80 }
81 virtual UNormalizationCheckResult
quickCheck(const UnicodeString &,UErrorCode &) const82 quickCheck(const UnicodeString &, UErrorCode &) const {
83 return UNORM_YES;
84 }
85 virtual int32_t
spanQuickCheckYes(const UnicodeString & s,UErrorCode &) const86 spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const {
87 return s.length();
88 }
hasBoundaryBefore(UChar32) const89 virtual UBool hasBoundaryBefore(UChar32) const { return TRUE; }
hasBoundaryAfter(UChar32) const90 virtual UBool hasBoundaryAfter(UChar32) const { return TRUE; }
isInert(UChar32) const91 virtual UBool isInert(UChar32) const { return TRUE; }
92
93 static UClassID U_EXPORT2 getStaticClassID();
94 virtual UClassID getDynamicClassID() const;
95 };
96
97 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(NoopNormalizer2)
98
99 // Intermediate class:
100 // Has Normalizer2Impl and does boilerplate argument checking and setup.
101 class Normalizer2WithImpl : public Normalizer2 {
102 public:
Normalizer2WithImpl(const Normalizer2Impl & ni)103 Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
104
105 // normalize
106 virtual UnicodeString &
normalize(const UnicodeString & src,UnicodeString & dest,UErrorCode & errorCode) const107 normalize(const UnicodeString &src,
108 UnicodeString &dest,
109 UErrorCode &errorCode) const {
110 if(U_FAILURE(errorCode)) {
111 dest.setToBogus();
112 return dest;
113 }
114 const UChar *sArray=src.getBuffer();
115 if(&dest==&src || sArray==NULL) {
116 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
117 dest.setToBogus();
118 return dest;
119 }
120 dest.remove();
121 ReorderingBuffer buffer(impl, dest);
122 if(buffer.init(src.length(), errorCode)) {
123 normalize(sArray, sArray+src.length(), buffer, errorCode);
124 }
125 return dest;
126 }
127 virtual void
128 normalize(const UChar *src, const UChar *limit,
129 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
130
131 // normalize and append
132 virtual UnicodeString &
normalizeSecondAndAppend(UnicodeString & first,const UnicodeString & second,UErrorCode & errorCode) const133 normalizeSecondAndAppend(UnicodeString &first,
134 const UnicodeString &second,
135 UErrorCode &errorCode) const {
136 return normalizeSecondAndAppend(first, second, TRUE, errorCode);
137 }
138 virtual UnicodeString &
append(UnicodeString & first,const UnicodeString & second,UErrorCode & errorCode) const139 append(UnicodeString &first,
140 const UnicodeString &second,
141 UErrorCode &errorCode) const {
142 return normalizeSecondAndAppend(first, second, FALSE, errorCode);
143 }
144 UnicodeString &
normalizeSecondAndAppend(UnicodeString & first,const UnicodeString & second,UBool doNormalize,UErrorCode & errorCode) const145 normalizeSecondAndAppend(UnicodeString &first,
146 const UnicodeString &second,
147 UBool doNormalize,
148 UErrorCode &errorCode) const {
149 uprv_checkCanGetBuffer(first, errorCode);
150 if(U_FAILURE(errorCode)) {
151 return first;
152 }
153 const UChar *secondArray=second.getBuffer();
154 if(&first==&second || secondArray==NULL) {
155 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
156 return first;
157 }
158 ReorderingBuffer buffer(impl, first);
159 if(buffer.init(first.length()+second.length(), errorCode)) {
160 normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
161 buffer, errorCode);
162 }
163 return first;
164 }
165 virtual void
166 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
167 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
168
169 // quick checks
170 virtual UBool
isNormalized(const UnicodeString & s,UErrorCode & errorCode) const171 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
172 if(U_FAILURE(errorCode)) {
173 return FALSE;
174 }
175 const UChar *sArray=s.getBuffer();
176 if(sArray==NULL) {
177 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
178 return FALSE;
179 }
180 const UChar *sLimit=sArray+s.length();
181 return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
182 }
183 virtual UNormalizationCheckResult
quickCheck(const UnicodeString & s,UErrorCode & errorCode) const184 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
185 return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
186 }
187 virtual int32_t
spanQuickCheckYes(const UnicodeString & s,UErrorCode & errorCode) const188 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
189 if(U_FAILURE(errorCode)) {
190 return 0;
191 }
192 const UChar *sArray=s.getBuffer();
193 if(sArray==NULL) {
194 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
195 return 0;
196 }
197 return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
198 }
199 virtual const UChar *
200 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0;
201
getQuickCheck(UChar32) const202 virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
203 return UNORM_YES;
204 }
205
206 static UClassID U_EXPORT2 getStaticClassID();
207 virtual UClassID getDynamicClassID() const;
208
209 const Normalizer2Impl &impl;
210 };
211
212 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Normalizer2WithImpl)
213
214 class DecomposeNormalizer2 : public Normalizer2WithImpl {
215 public:
DecomposeNormalizer2(const Normalizer2Impl & ni)216 DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
217
218 private:
219 virtual void
normalize(const UChar * src,const UChar * limit,ReorderingBuffer & buffer,UErrorCode & errorCode) const220 normalize(const UChar *src, const UChar *limit,
221 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
222 impl.decompose(src, limit, &buffer, errorCode);
223 }
224 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
225 virtual void
normalizeAndAppend(const UChar * src,const UChar * limit,UBool doNormalize,ReorderingBuffer & buffer,UErrorCode & errorCode) const226 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
227 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
228 impl.decomposeAndAppend(src, limit, doNormalize, buffer, errorCode);
229 }
230 virtual const UChar *
spanQuickCheckYes(const UChar * src,const UChar * limit,UErrorCode & errorCode) const231 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
232 return impl.decompose(src, limit, NULL, errorCode);
233 }
234 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
getQuickCheck(UChar32 c) const235 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
236 return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
237 }
hasBoundaryBefore(UChar32 c) const238 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); }
hasBoundaryAfter(UChar32 c) const239 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); }
isInert(UChar32 c) const240 virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
241 };
242
243 class ComposeNormalizer2 : public Normalizer2WithImpl {
244 public:
ComposeNormalizer2(const Normalizer2Impl & ni,UBool fcc)245 ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
246 Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
247
248 private:
249 virtual void
normalize(const UChar * src,const UChar * limit,ReorderingBuffer & buffer,UErrorCode & errorCode) const250 normalize(const UChar *src, const UChar *limit,
251 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
252 impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
253 }
254 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
255 virtual void
normalizeAndAppend(const UChar * src,const UChar * limit,UBool doNormalize,ReorderingBuffer & buffer,UErrorCode & errorCode) const256 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
257 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
258 impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, buffer, errorCode);
259 }
260
261 virtual UBool
isNormalized(const UnicodeString & s,UErrorCode & errorCode) const262 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
263 if(U_FAILURE(errorCode)) {
264 return FALSE;
265 }
266 const UChar *sArray=s.getBuffer();
267 if(sArray==NULL) {
268 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
269 return FALSE;
270 }
271 UnicodeString temp;
272 ReorderingBuffer buffer(impl, temp);
273 if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization
274 return FALSE;
275 }
276 return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode);
277 }
278 virtual UNormalizationCheckResult
quickCheck(const UnicodeString & s,UErrorCode & errorCode) const279 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
280 if(U_FAILURE(errorCode)) {
281 return UNORM_MAYBE;
282 }
283 const UChar *sArray=s.getBuffer();
284 if(sArray==NULL) {
285 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
286 return UNORM_MAYBE;
287 }
288 UNormalizationCheckResult qcResult=UNORM_YES;
289 impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
290 return qcResult;
291 }
292 virtual const UChar *
spanQuickCheckYes(const UChar * src,const UChar * limit,UErrorCode &) const293 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const {
294 return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
295 }
296 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
getQuickCheck(UChar32 c) const297 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
298 return impl.getCompQuickCheck(impl.getNorm16(c));
299 }
hasBoundaryBefore(UChar32 c) const300 virtual UBool hasBoundaryBefore(UChar32 c) const {
301 return impl.hasCompBoundaryBefore(c);
302 }
hasBoundaryAfter(UChar32 c) const303 virtual UBool hasBoundaryAfter(UChar32 c) const {
304 return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE);
305 }
isInert(UChar32 c) const306 virtual UBool isInert(UChar32 c) const {
307 return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE);
308 }
309
310 const UBool onlyContiguous;
311 };
312
313 class FCDNormalizer2 : public Normalizer2WithImpl {
314 public:
FCDNormalizer2(const Normalizer2Impl & ni)315 FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
316
317 private:
318 virtual void
normalize(const UChar * src,const UChar * limit,ReorderingBuffer & buffer,UErrorCode & errorCode) const319 normalize(const UChar *src, const UChar *limit,
320 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
321 impl.makeFCD(src, limit, &buffer, errorCode);
322 }
323 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
324 virtual void
normalizeAndAppend(const UChar * src,const UChar * limit,UBool doNormalize,ReorderingBuffer & buffer,UErrorCode & errorCode) const325 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
326 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
327 impl.makeFCDAndAppend(src, limit, doNormalize, buffer, errorCode);
328 }
329 virtual const UChar *
spanQuickCheckYes(const UChar * src,const UChar * limit,UErrorCode & errorCode) const330 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
331 return impl.makeFCD(src, limit, NULL, errorCode);
332 }
333 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
hasBoundaryBefore(UChar32 c) const334 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
hasBoundaryAfter(UChar32 c) const335 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
isInert(UChar32 c) const336 virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
337 };
338
339 // instance cache ---------------------------------------------------------- ***
340
341 struct Norm2AllModes : public UMemory {
342 static Norm2AllModes *createInstance(const char *packageName,
343 const char *name,
344 UErrorCode &errorCode);
Norm2AllModesNorm2AllModes345 Norm2AllModes() : comp(impl, FALSE), decomp(impl), fcd(impl), fcc(impl, TRUE) {}
346
347 Normalizer2Impl impl;
348 ComposeNormalizer2 comp;
349 DecomposeNormalizer2 decomp;
350 FCDNormalizer2 fcd;
351 ComposeNormalizer2 fcc;
352 };
353
354 Norm2AllModes *
createInstance(const char * packageName,const char * name,UErrorCode & errorCode)355 Norm2AllModes::createInstance(const char *packageName,
356 const char *name,
357 UErrorCode &errorCode) {
358 if(U_FAILURE(errorCode)) {
359 return NULL;
360 }
361 LocalPointer<Norm2AllModes> allModes(new Norm2AllModes);
362 if(allModes.isNull()) {
363 errorCode=U_MEMORY_ALLOCATION_ERROR;
364 return NULL;
365 }
366 allModes->impl.load(packageName, name, errorCode);
367 return U_SUCCESS(errorCode) ? allModes.orphan() : NULL;
368 }
369
370 U_CDECL_BEGIN
371 static UBool U_CALLCONV uprv_normalizer2_cleanup();
372 U_CDECL_END
373
374 class Norm2AllModesSingleton : public TriStateSingletonWrapper<Norm2AllModes> {
375 public:
Norm2AllModesSingleton(TriStateSingleton & s,const char * n)376 Norm2AllModesSingleton(TriStateSingleton &s, const char *n) :
377 TriStateSingletonWrapper<Norm2AllModes>(s), name(n) {}
getInstance(UErrorCode & errorCode)378 Norm2AllModes *getInstance(UErrorCode &errorCode) {
379 return TriStateSingletonWrapper<Norm2AllModes>::getInstance(createInstance, name, errorCode);
380 }
381 private:
createInstance(const void * context,UErrorCode & errorCode)382 static void *createInstance(const void *context, UErrorCode &errorCode) {
383 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
384 return Norm2AllModes::createInstance(NULL, (const char *)context, errorCode);
385 }
386
387 const char *name;
388 };
389
390 STATIC_TRI_STATE_SINGLETON(nfcSingleton);
391 STATIC_TRI_STATE_SINGLETON(nfkcSingleton);
392 STATIC_TRI_STATE_SINGLETON(nfkc_cfSingleton);
393
394 class Norm2Singleton : public SimpleSingletonWrapper<Normalizer2> {
395 public:
Norm2Singleton(SimpleSingleton & s)396 Norm2Singleton(SimpleSingleton &s) : SimpleSingletonWrapper<Normalizer2>(s) {}
getInstance(UErrorCode & errorCode)397 Normalizer2 *getInstance(UErrorCode &errorCode) {
398 return SimpleSingletonWrapper<Normalizer2>::getInstance(createInstance, NULL, errorCode);
399 }
400 private:
createInstance(const void *,UErrorCode & errorCode)401 static void *createInstance(const void *, UErrorCode &errorCode) {
402 Normalizer2 *noop=new NoopNormalizer2;
403 if(noop==NULL) {
404 errorCode=U_MEMORY_ALLOCATION_ERROR;
405 }
406 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
407 return noop;
408 }
409 };
410
411 STATIC_SIMPLE_SINGLETON(noopSingleton);
412
413 static UHashtable *cache=NULL;
414
415 U_CDECL_BEGIN
416
deleteNorm2AllModes(void * allModes)417 static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
418 delete (Norm2AllModes *)allModes;
419 }
420
uprv_normalizer2_cleanup()421 static UBool U_CALLCONV uprv_normalizer2_cleanup() {
422 Norm2AllModesSingleton(nfcSingleton, NULL).deleteInstance();
423 Norm2AllModesSingleton(nfkcSingleton, NULL).deleteInstance();
424 Norm2AllModesSingleton(nfkc_cfSingleton, NULL).deleteInstance();
425 Norm2Singleton(noopSingleton).deleteInstance();
426 uhash_close(cache);
427 cache=NULL;
428 return TRUE;
429 }
430
431 U_CDECL_END
432
getNFCInstance(UErrorCode & errorCode)433 const Normalizer2 *Normalizer2Factory::getNFCInstance(UErrorCode &errorCode) {
434 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
435 return allModes!=NULL ? &allModes->comp : NULL;
436 }
437
getNFDInstance(UErrorCode & errorCode)438 const Normalizer2 *Normalizer2Factory::getNFDInstance(UErrorCode &errorCode) {
439 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
440 return allModes!=NULL ? &allModes->decomp : NULL;
441 }
442
getFCDInstance(UErrorCode & errorCode)443 const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
444 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
445 if(allModes!=NULL) {
446 allModes->impl.getFCDTrie(errorCode);
447 return &allModes->fcd;
448 } else {
449 return NULL;
450 }
451 }
452
getFCCInstance(UErrorCode & errorCode)453 const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
454 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
455 return allModes!=NULL ? &allModes->fcc : NULL;
456 }
457
getNFKCInstance(UErrorCode & errorCode)458 const Normalizer2 *Normalizer2Factory::getNFKCInstance(UErrorCode &errorCode) {
459 Norm2AllModes *allModes=
460 Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
461 return allModes!=NULL ? &allModes->comp : NULL;
462 }
463
getNFKDInstance(UErrorCode & errorCode)464 const Normalizer2 *Normalizer2Factory::getNFKDInstance(UErrorCode &errorCode) {
465 Norm2AllModes *allModes=
466 Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
467 return allModes!=NULL ? &allModes->decomp : NULL;
468 }
469
getNFKC_CFInstance(UErrorCode & errorCode)470 const Normalizer2 *Normalizer2Factory::getNFKC_CFInstance(UErrorCode &errorCode) {
471 Norm2AllModes *allModes=
472 Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
473 return allModes!=NULL ? &allModes->comp : NULL;
474 }
475
getNoopInstance(UErrorCode & errorCode)476 const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) {
477 return Norm2Singleton(noopSingleton).getInstance(errorCode);
478 }
479
480 const Normalizer2 *
getInstance(UNormalizationMode mode,UErrorCode & errorCode)481 Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
482 if(U_FAILURE(errorCode)) {
483 return NULL;
484 }
485 switch(mode) {
486 case UNORM_NFD:
487 return getNFDInstance(errorCode);
488 case UNORM_NFKD:
489 return getNFKDInstance(errorCode);
490 case UNORM_NFC:
491 return getNFCInstance(errorCode);
492 case UNORM_NFKC:
493 return getNFKCInstance(errorCode);
494 case UNORM_FCD:
495 return getFCDInstance(errorCode);
496 default: // UNORM_NONE
497 return getNoopInstance(errorCode);
498 }
499 }
500
501 const Normalizer2Impl *
getNFCImpl(UErrorCode & errorCode)502 Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
503 Norm2AllModes *allModes=
504 Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
505 return allModes!=NULL ? &allModes->impl : NULL;
506 }
507
508 const Normalizer2Impl *
getNFKCImpl(UErrorCode & errorCode)509 Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
510 Norm2AllModes *allModes=
511 Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
512 return allModes!=NULL ? &allModes->impl : NULL;
513 }
514
515 const Normalizer2Impl *
getNFKC_CFImpl(UErrorCode & errorCode)516 Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
517 Norm2AllModes *allModes=
518 Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
519 return allModes!=NULL ? &allModes->impl : NULL;
520 }
521
522 const Normalizer2Impl *
getImpl(const Normalizer2 * norm2)523 Normalizer2Factory::getImpl(const Normalizer2 *norm2) {
524 return &((Normalizer2WithImpl *)norm2)->impl;
525 }
526
527 const UTrie2 *
getFCDTrie(UErrorCode & errorCode)528 Normalizer2Factory::getFCDTrie(UErrorCode &errorCode) {
529 Norm2AllModes *allModes=
530 Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
531 if(allModes!=NULL) {
532 return allModes->impl.getFCDTrie(errorCode);
533 } else {
534 return NULL;
535 }
536 }
537
538 const Normalizer2 *
getInstance(const char * packageName,const char * name,UNormalization2Mode mode,UErrorCode & errorCode)539 Normalizer2::getInstance(const char *packageName,
540 const char *name,
541 UNormalization2Mode mode,
542 UErrorCode &errorCode) {
543 if(U_FAILURE(errorCode)) {
544 return NULL;
545 }
546 if(name==NULL || *name==0) {
547 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
548 }
549 Norm2AllModes *allModes=NULL;
550 if(packageName==NULL) {
551 if(0==uprv_strcmp(name, "nfc")) {
552 allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
553 } else if(0==uprv_strcmp(name, "nfkc")) {
554 allModes=Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
555 } else if(0==uprv_strcmp(name, "nfkc_cf")) {
556 allModes=Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
557 }
558 }
559 if(allModes==NULL && U_SUCCESS(errorCode)) {
560 {
561 Mutex lock;
562 if(cache!=NULL) {
563 allModes=(Norm2AllModes *)uhash_get(cache, name);
564 }
565 }
566 if(allModes==NULL) {
567 LocalPointer<Norm2AllModes> localAllModes(
568 Norm2AllModes::createInstance(packageName, name, errorCode));
569 if(U_SUCCESS(errorCode)) {
570 Mutex lock;
571 if(cache==NULL) {
572 cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode);
573 if(U_FAILURE(errorCode)) {
574 return NULL;
575 }
576 uhash_setKeyDeleter(cache, uprv_free);
577 uhash_setValueDeleter(cache, deleteNorm2AllModes);
578 }
579 void *temp=uhash_get(cache, name);
580 if(temp==NULL) {
581 int32_t keyLength=uprv_strlen(name)+1;
582 char *nameCopy=(char *)uprv_malloc(keyLength);
583 if(nameCopy==NULL) {
584 errorCode=U_MEMORY_ALLOCATION_ERROR;
585 return NULL;
586 }
587 uprv_memcpy(nameCopy, name, keyLength);
588 uhash_put(cache, nameCopy, allModes=localAllModes.orphan(), &errorCode);
589 } else {
590 // race condition
591 allModes=(Norm2AllModes *)temp;
592 }
593 }
594 }
595 }
596 if(allModes!=NULL && U_SUCCESS(errorCode)) {
597 switch(mode) {
598 case UNORM2_COMPOSE:
599 return &allModes->comp;
600 case UNORM2_DECOMPOSE:
601 return &allModes->decomp;
602 case UNORM2_FCD:
603 allModes->impl.getFCDTrie(errorCode);
604 return &allModes->fcd;
605 case UNORM2_COMPOSE_CONTIGUOUS:
606 return &allModes->fcc;
607 default:
608 break; // do nothing
609 }
610 }
611 return NULL;
612 }
613
UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(Normalizer2)614 UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(Normalizer2)
615
616 U_NAMESPACE_END
617
618 // C API ------------------------------------------------------------------- ***
619
620 U_NAMESPACE_USE
621
622 U_DRAFT const UNormalizer2 * U_EXPORT2
623 unorm2_getInstance(const char *packageName,
624 const char *name,
625 UNormalization2Mode mode,
626 UErrorCode *pErrorCode) {
627 return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
628 }
629
630 U_DRAFT void U_EXPORT2
unorm2_close(UNormalizer2 * norm2)631 unorm2_close(UNormalizer2 *norm2) {
632 delete (Normalizer2 *)norm2;
633 }
634
635 U_DRAFT int32_t U_EXPORT2
unorm2_normalize(const UNormalizer2 * norm2,const UChar * src,int32_t length,UChar * dest,int32_t capacity,UErrorCode * pErrorCode)636 unorm2_normalize(const UNormalizer2 *norm2,
637 const UChar *src, int32_t length,
638 UChar *dest, int32_t capacity,
639 UErrorCode *pErrorCode) {
640 if(U_FAILURE(*pErrorCode)) {
641 return 0;
642 }
643 if(src==NULL || length<-1 || capacity<0 || (dest==NULL && capacity>0) || src==dest) {
644 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
645 return 0;
646 }
647 UnicodeString destString(dest, 0, capacity);
648 const Normalizer2 *n2=(const Normalizer2 *)norm2;
649 if(n2->getDynamicClassID()==Normalizer2WithImpl::getStaticClassID()) {
650 // Avoid duplicate argument checking and support NUL-terminated src.
651 const Normalizer2WithImpl *n2wi=(const Normalizer2WithImpl *)n2;
652 ReorderingBuffer buffer(n2wi->impl, destString);
653 if(buffer.init(length, *pErrorCode)) {
654 n2wi->normalize(src, length>=0 ? src+length : NULL, buffer, *pErrorCode);
655 }
656 } else {
657 UnicodeString srcString(length<0, src, length);
658 n2->normalize(srcString, destString, *pErrorCode);
659 }
660 return destString.extract(dest, capacity, *pErrorCode);
661 }
662
663 static int32_t
normalizeSecondAndAppend(const UNormalizer2 * norm2,UChar * first,int32_t firstLength,int32_t firstCapacity,const UChar * second,int32_t secondLength,UBool doNormalize,UErrorCode * pErrorCode)664 normalizeSecondAndAppend(const UNormalizer2 *norm2,
665 UChar *first, int32_t firstLength, int32_t firstCapacity,
666 const UChar *second, int32_t secondLength,
667 UBool doNormalize,
668 UErrorCode *pErrorCode) {
669 if(U_FAILURE(*pErrorCode)) {
670 return 0;
671 }
672 if( second==NULL || secondLength<-1 ||
673 firstCapacity<0 || (first==NULL && firstCapacity>0) || firstLength<-1 ||
674 first==second
675 ) {
676 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
677 return 0;
678 }
679 UnicodeString firstString(first, firstLength, firstCapacity);
680 const Normalizer2 *n2=(const Normalizer2 *)norm2;
681 if(n2->getDynamicClassID()==Normalizer2WithImpl::getStaticClassID()) {
682 // Avoid duplicate argument checking and support NUL-terminated src.
683 const Normalizer2WithImpl *n2wi=(const Normalizer2WithImpl *)n2;
684 ReorderingBuffer buffer(n2wi->impl, firstString);
685 if(buffer.init(firstLength+secondLength+1, *pErrorCode)) { // destCapacity>=-1
686 n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL,
687 doNormalize, buffer, *pErrorCode);
688 }
689 } else {
690 UnicodeString secondString(secondLength<0, second, secondLength);
691 if(doNormalize) {
692 n2->normalizeSecondAndAppend(firstString, secondString, *pErrorCode);
693 } else {
694 n2->append(firstString, secondString, *pErrorCode);
695 }
696 }
697 return firstString.extract(first, firstCapacity, *pErrorCode);
698 }
699
700 U_DRAFT int32_t U_EXPORT2
unorm2_normalizeSecondAndAppend(const UNormalizer2 * norm2,UChar * first,int32_t firstLength,int32_t firstCapacity,const UChar * second,int32_t secondLength,UErrorCode * pErrorCode)701 unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2,
702 UChar *first, int32_t firstLength, int32_t firstCapacity,
703 const UChar *second, int32_t secondLength,
704 UErrorCode *pErrorCode) {
705 return normalizeSecondAndAppend(norm2,
706 first, firstLength, firstCapacity,
707 second, secondLength,
708 TRUE, pErrorCode);
709 }
710
711 U_DRAFT int32_t U_EXPORT2
unorm2_append(const UNormalizer2 * norm2,UChar * first,int32_t firstLength,int32_t firstCapacity,const UChar * second,int32_t secondLength,UErrorCode * pErrorCode)712 unorm2_append(const UNormalizer2 *norm2,
713 UChar *first, int32_t firstLength, int32_t firstCapacity,
714 const UChar *second, int32_t secondLength,
715 UErrorCode *pErrorCode) {
716 return normalizeSecondAndAppend(norm2,
717 first, firstLength, firstCapacity,
718 second, secondLength,
719 FALSE, pErrorCode);
720 }
721
722 U_DRAFT UBool U_EXPORT2
unorm2_isNormalized(const UNormalizer2 * norm2,const UChar * s,int32_t length,UErrorCode * pErrorCode)723 unorm2_isNormalized(const UNormalizer2 *norm2,
724 const UChar *s, int32_t length,
725 UErrorCode *pErrorCode) {
726 if(U_FAILURE(*pErrorCode)) {
727 return 0;
728 }
729 if(s==NULL || length<-1) {
730 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
731 return 0;
732 }
733 UnicodeString sString(length<0, s, length);
734 return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode);
735 }
736
737 U_DRAFT UNormalizationCheckResult U_EXPORT2
unorm2_quickCheck(const UNormalizer2 * norm2,const UChar * s,int32_t length,UErrorCode * pErrorCode)738 unorm2_quickCheck(const UNormalizer2 *norm2,
739 const UChar *s, int32_t length,
740 UErrorCode *pErrorCode) {
741 if(U_FAILURE(*pErrorCode)) {
742 return UNORM_NO;
743 }
744 if(s==NULL || length<-1) {
745 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
746 return UNORM_NO;
747 }
748 UnicodeString sString(length<0, s, length);
749 return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode);
750 }
751
752 U_DRAFT int32_t U_EXPORT2
unorm2_spanQuickCheckYes(const UNormalizer2 * norm2,const UChar * s,int32_t length,UErrorCode * pErrorCode)753 unorm2_spanQuickCheckYes(const UNormalizer2 *norm2,
754 const UChar *s, int32_t length,
755 UErrorCode *pErrorCode) {
756 if(U_FAILURE(*pErrorCode)) {
757 return 0;
758 }
759 if(s==NULL || length<-1) {
760 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
761 return 0;
762 }
763 UnicodeString sString(length<0, s, length);
764 return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode);
765 }
766
767 U_DRAFT UBool U_EXPORT2
unorm2_hasBoundaryBefore(const UNormalizer2 * norm2,UChar32 c)768 unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) {
769 return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c);
770 }
771
772 U_DRAFT UBool U_EXPORT2
unorm2_hasBoundaryAfter(const UNormalizer2 * norm2,UChar32 c)773 unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) {
774 return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c);
775 }
776
777 U_DRAFT UBool U_EXPORT2
unorm2_isInert(const UNormalizer2 * norm2,UChar32 c)778 unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) {
779 return ((const Normalizer2 *)norm2)->isInert(c);
780 }
781
782 // Some properties APIs ---------------------------------------------------- ***
783
784 U_CFUNC UNormalizationCheckResult U_EXPORT2
unorm_getQuickCheck(UChar32 c,UNormalizationMode mode)785 unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
786 if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
787 return UNORM_YES;
788 }
789 UErrorCode errorCode=U_ZERO_ERROR;
790 const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
791 if(U_SUCCESS(errorCode)) {
792 return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
793 } else {
794 return UNORM_MAYBE;
795 }
796 }
797
798 U_CAPI const uint16_t * U_EXPORT2
unorm_getFCDTrieIndex(UChar32 & fcdHighStart,UErrorCode * pErrorCode)799 unorm_getFCDTrieIndex(UChar32 &fcdHighStart, UErrorCode *pErrorCode) {
800 const UTrie2 *trie=Normalizer2Factory::getFCDTrie(*pErrorCode);
801 if(U_SUCCESS(*pErrorCode)) {
802 fcdHighStart=trie->highStart;
803 return trie->index;
804 } else {
805 return NULL;
806 }
807 }
808
809 #endif // !UCONFIG_NO_NORMALIZATION
810