1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2009-2011, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: normalizer2.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2009nov22
14 * created by: Markus W. Scherer
15 */
16
17 #include "unicode/utypes.h"
18
19 #if !UCONFIG_NO_NORMALIZATION
20
21 #include "unicode/localpointer.h"
22 #include "unicode/normalizer2.h"
23 #include "unicode/unistr.h"
24 #include "unicode/unorm.h"
25 #include "cpputils.h"
26 #include "cstring.h"
27 #include "mutex.h"
28 #include "normalizer2impl.h"
29 #include "ucln_cmn.h"
30 #include "uhash.h"
31
32 U_NAMESPACE_BEGIN
33
34 // Public API dispatch via Normalizer2 subclasses -------------------------- ***
35
36 // Normalizer2 implementation for the old UNORM_NONE.
37 class NoopNormalizer2 : public Normalizer2 {
38 virtual UnicodeString &
normalize(const UnicodeString & src,UnicodeString & dest,UErrorCode & errorCode) const39 normalize(const UnicodeString &src,
40 UnicodeString &dest,
41 UErrorCode &errorCode) const {
42 if(U_SUCCESS(errorCode)) {
43 if(&dest!=&src) {
44 dest=src;
45 } else {
46 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
47 }
48 }
49 return dest;
50 }
51 virtual UnicodeString &
normalizeSecondAndAppend(UnicodeString & first,const UnicodeString & second,UErrorCode & errorCode) const52 normalizeSecondAndAppend(UnicodeString &first,
53 const UnicodeString &second,
54 UErrorCode &errorCode) const {
55 if(U_SUCCESS(errorCode)) {
56 if(&first!=&second) {
57 first.append(second);
58 } else {
59 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
60 }
61 }
62 return first;
63 }
64 virtual UnicodeString &
append(UnicodeString & first,const UnicodeString & second,UErrorCode & errorCode) const65 append(UnicodeString &first,
66 const UnicodeString &second,
67 UErrorCode &errorCode) const {
68 if(U_SUCCESS(errorCode)) {
69 if(&first!=&second) {
70 first.append(second);
71 } else {
72 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
73 }
74 }
75 return first;
76 }
77 virtual UBool
getDecomposition(UChar32,UnicodeString &) const78 getDecomposition(UChar32, UnicodeString &) const {
79 return FALSE;
80 }
81 virtual UBool
isNormalized(const UnicodeString &,UErrorCode &) const82 isNormalized(const UnicodeString &, UErrorCode &) const {
83 return TRUE;
84 }
85 virtual UNormalizationCheckResult
quickCheck(const UnicodeString &,UErrorCode &) const86 quickCheck(const UnicodeString &, UErrorCode &) const {
87 return UNORM_YES;
88 }
89 virtual int32_t
spanQuickCheckYes(const UnicodeString & s,UErrorCode &) const90 spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const {
91 return s.length();
92 }
hasBoundaryBefore(UChar32) const93 virtual UBool hasBoundaryBefore(UChar32) const { return TRUE; }
hasBoundaryAfter(UChar32) const94 virtual UBool hasBoundaryAfter(UChar32) const { return TRUE; }
isInert(UChar32) const95 virtual UBool isInert(UChar32) const { return TRUE; }
96 };
97
98 // Intermediate class:
99 // Has Normalizer2Impl and does boilerplate argument checking and setup.
100 class Normalizer2WithImpl : public Normalizer2 {
101 public:
Normalizer2WithImpl(const Normalizer2Impl & ni)102 Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
103
104 // normalize
105 virtual UnicodeString &
normalize(const UnicodeString & src,UnicodeString & dest,UErrorCode & errorCode) const106 normalize(const UnicodeString &src,
107 UnicodeString &dest,
108 UErrorCode &errorCode) const {
109 if(U_FAILURE(errorCode)) {
110 dest.setToBogus();
111 return dest;
112 }
113 const UChar *sArray=src.getBuffer();
114 if(&dest==&src || sArray==NULL) {
115 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
116 dest.setToBogus();
117 return dest;
118 }
119 dest.remove();
120 ReorderingBuffer buffer(impl, dest);
121 if(buffer.init(src.length(), errorCode)) {
122 normalize(sArray, sArray+src.length(), buffer, errorCode);
123 }
124 return dest;
125 }
126 virtual void
127 normalize(const UChar *src, const UChar *limit,
128 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
129
130 // normalize and append
131 virtual UnicodeString &
normalizeSecondAndAppend(UnicodeString & first,const UnicodeString & second,UErrorCode & errorCode) const132 normalizeSecondAndAppend(UnicodeString &first,
133 const UnicodeString &second,
134 UErrorCode &errorCode) const {
135 return normalizeSecondAndAppend(first, second, TRUE, errorCode);
136 }
137 virtual UnicodeString &
append(UnicodeString & first,const UnicodeString & second,UErrorCode & errorCode) const138 append(UnicodeString &first,
139 const UnicodeString &second,
140 UErrorCode &errorCode) const {
141 return normalizeSecondAndAppend(first, second, FALSE, errorCode);
142 }
143 UnicodeString &
normalizeSecondAndAppend(UnicodeString & first,const UnicodeString & second,UBool doNormalize,UErrorCode & errorCode) const144 normalizeSecondAndAppend(UnicodeString &first,
145 const UnicodeString &second,
146 UBool doNormalize,
147 UErrorCode &errorCode) const {
148 uprv_checkCanGetBuffer(first, errorCode);
149 if(U_FAILURE(errorCode)) {
150 return first;
151 }
152 const UChar *secondArray=second.getBuffer();
153 if(&first==&second || secondArray==NULL) {
154 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
155 return first;
156 }
157 int32_t firstLength=first.length();
158 UnicodeString safeMiddle;
159 {
160 ReorderingBuffer buffer(impl, first);
161 if(buffer.init(firstLength+second.length(), errorCode)) {
162 normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
163 safeMiddle, buffer, errorCode);
164 }
165 } // The ReorderingBuffer destructor finalizes the first string.
166 if(U_FAILURE(errorCode)) {
167 // Restore the modified suffix of the first string.
168 first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
169 }
170 return first;
171 }
172 virtual void
173 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
174 UnicodeString &safeMiddle,
175 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
176 virtual UBool
getDecomposition(UChar32 c,UnicodeString & decomposition) const177 getDecomposition(UChar32 c, UnicodeString &decomposition) const {
178 UChar buffer[4];
179 int32_t length;
180 const UChar *d=impl.getDecomposition(c, buffer, length);
181 if(d==NULL) {
182 return FALSE;
183 }
184 if(d==buffer) {
185 decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c)
186 } else {
187 decomposition.setTo(FALSE, d, length); // read-only alias
188 }
189 return TRUE;
190 }
191
192 // quick checks
193 virtual UBool
isNormalized(const UnicodeString & s,UErrorCode & errorCode) const194 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
195 if(U_FAILURE(errorCode)) {
196 return FALSE;
197 }
198 const UChar *sArray=s.getBuffer();
199 if(sArray==NULL) {
200 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
201 return FALSE;
202 }
203 const UChar *sLimit=sArray+s.length();
204 return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
205 }
206 virtual UNormalizationCheckResult
quickCheck(const UnicodeString & s,UErrorCode & errorCode) const207 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
208 return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
209 }
210 virtual int32_t
spanQuickCheckYes(const UnicodeString & s,UErrorCode & errorCode) const211 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
212 if(U_FAILURE(errorCode)) {
213 return 0;
214 }
215 const UChar *sArray=s.getBuffer();
216 if(sArray==NULL) {
217 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
218 return 0;
219 }
220 return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
221 }
222 virtual const UChar *
223 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0;
224
getQuickCheck(UChar32) const225 virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
226 return UNORM_YES;
227 }
228
229 const Normalizer2Impl &impl;
230 };
231
232 class DecomposeNormalizer2 : public Normalizer2WithImpl {
233 public:
DecomposeNormalizer2(const Normalizer2Impl & ni)234 DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
235
236 private:
237 virtual void
normalize(const UChar * src,const UChar * limit,ReorderingBuffer & buffer,UErrorCode & errorCode) const238 normalize(const UChar *src, const UChar *limit,
239 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
240 impl.decompose(src, limit, &buffer, errorCode);
241 }
242 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
243 virtual void
normalizeAndAppend(const UChar * src,const UChar * limit,UBool doNormalize,UnicodeString & safeMiddle,ReorderingBuffer & buffer,UErrorCode & errorCode) const244 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
245 UnicodeString &safeMiddle,
246 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
247 impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
248 }
249 virtual const UChar *
spanQuickCheckYes(const UChar * src,const UChar * limit,UErrorCode & errorCode) const250 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
251 return impl.decompose(src, limit, NULL, errorCode);
252 }
253 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
getQuickCheck(UChar32 c) const254 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
255 return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
256 }
hasBoundaryBefore(UChar32 c) const257 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); }
hasBoundaryAfter(UChar32 c) const258 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); }
isInert(UChar32 c) const259 virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
260 };
261
262 class ComposeNormalizer2 : public Normalizer2WithImpl {
263 public:
ComposeNormalizer2(const Normalizer2Impl & ni,UBool fcc)264 ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
265 Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
266
267 private:
268 virtual void
normalize(const UChar * src,const UChar * limit,ReorderingBuffer & buffer,UErrorCode & errorCode) const269 normalize(const UChar *src, const UChar *limit,
270 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
271 impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
272 }
273 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
274 virtual void
normalizeAndAppend(const UChar * src,const UChar * limit,UBool doNormalize,UnicodeString & safeMiddle,ReorderingBuffer & buffer,UErrorCode & errorCode) const275 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
276 UnicodeString &safeMiddle,
277 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
278 impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
279 }
280
281 virtual UBool
isNormalized(const UnicodeString & s,UErrorCode & errorCode) const282 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
283 if(U_FAILURE(errorCode)) {
284 return FALSE;
285 }
286 const UChar *sArray=s.getBuffer();
287 if(sArray==NULL) {
288 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
289 return FALSE;
290 }
291 UnicodeString temp;
292 ReorderingBuffer buffer(impl, temp);
293 if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization
294 return FALSE;
295 }
296 return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode);
297 }
298 virtual UNormalizationCheckResult
quickCheck(const UnicodeString & s,UErrorCode & errorCode) const299 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
300 if(U_FAILURE(errorCode)) {
301 return UNORM_MAYBE;
302 }
303 const UChar *sArray=s.getBuffer();
304 if(sArray==NULL) {
305 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
306 return UNORM_MAYBE;
307 }
308 UNormalizationCheckResult qcResult=UNORM_YES;
309 impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
310 return qcResult;
311 }
312 virtual const UChar *
spanQuickCheckYes(const UChar * src,const UChar * limit,UErrorCode &) const313 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const {
314 return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
315 }
316 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
getQuickCheck(UChar32 c) const317 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
318 return impl.getCompQuickCheck(impl.getNorm16(c));
319 }
hasBoundaryBefore(UChar32 c) const320 virtual UBool hasBoundaryBefore(UChar32 c) const {
321 return impl.hasCompBoundaryBefore(c);
322 }
hasBoundaryAfter(UChar32 c) const323 virtual UBool hasBoundaryAfter(UChar32 c) const {
324 return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE);
325 }
isInert(UChar32 c) const326 virtual UBool isInert(UChar32 c) const {
327 return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE);
328 }
329
330 const UBool onlyContiguous;
331 };
332
333 class FCDNormalizer2 : public Normalizer2WithImpl {
334 public:
FCDNormalizer2(const Normalizer2Impl & ni)335 FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
336
337 private:
338 virtual void
normalize(const UChar * src,const UChar * limit,ReorderingBuffer & buffer,UErrorCode & errorCode) const339 normalize(const UChar *src, const UChar *limit,
340 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
341 impl.makeFCD(src, limit, &buffer, errorCode);
342 }
343 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
344 virtual void
normalizeAndAppend(const UChar * src,const UChar * limit,UBool doNormalize,UnicodeString & safeMiddle,ReorderingBuffer & buffer,UErrorCode & errorCode) const345 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
346 UnicodeString &safeMiddle,
347 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
348 impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
349 }
350 virtual const UChar *
spanQuickCheckYes(const UChar * src,const UChar * limit,UErrorCode & errorCode) const351 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
352 return impl.makeFCD(src, limit, NULL, errorCode);
353 }
354 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
hasBoundaryBefore(UChar32 c) const355 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
hasBoundaryAfter(UChar32 c) const356 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
isInert(UChar32 c) const357 virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
358 };
359
360 // instance cache ---------------------------------------------------------- ***
361
362 struct Norm2AllModes : public UMemory {
363 static Norm2AllModes *createInstance(const char *packageName,
364 const char *name,
365 UErrorCode &errorCode);
Norm2AllModesNorm2AllModes366 Norm2AllModes() : comp(impl, FALSE), decomp(impl), fcd(impl), fcc(impl, TRUE) {}
367
368 Normalizer2Impl impl;
369 ComposeNormalizer2 comp;
370 DecomposeNormalizer2 decomp;
371 FCDNormalizer2 fcd;
372 ComposeNormalizer2 fcc;
373 };
374
375 Norm2AllModes *
createInstance(const char * packageName,const char * name,UErrorCode & errorCode)376 Norm2AllModes::createInstance(const char *packageName,
377 const char *name,
378 UErrorCode &errorCode) {
379 if(U_FAILURE(errorCode)) {
380 return NULL;
381 }
382 LocalPointer<Norm2AllModes> allModes(new Norm2AllModes);
383 if(allModes.isNull()) {
384 errorCode=U_MEMORY_ALLOCATION_ERROR;
385 return NULL;
386 }
387 allModes->impl.load(packageName, name, errorCode);
388 return U_SUCCESS(errorCode) ? allModes.orphan() : NULL;
389 }
390
391 U_CDECL_BEGIN
392 static UBool U_CALLCONV uprv_normalizer2_cleanup();
393 U_CDECL_END
394
395 class Norm2AllModesSingleton : public TriStateSingletonWrapper<Norm2AllModes> {
396 public:
Norm2AllModesSingleton(TriStateSingleton & s,const char * n)397 Norm2AllModesSingleton(TriStateSingleton &s, const char *n) :
398 TriStateSingletonWrapper<Norm2AllModes>(s), name(n) {}
getInstance(UErrorCode & errorCode)399 Norm2AllModes *getInstance(UErrorCode &errorCode) {
400 return TriStateSingletonWrapper<Norm2AllModes>::getInstance(createInstance, name, errorCode);
401 }
402 private:
createInstance(const void * context,UErrorCode & errorCode)403 static void *createInstance(const void *context, UErrorCode &errorCode) {
404 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
405 return Norm2AllModes::createInstance(NULL, (const char *)context, errorCode);
406 }
407
408 const char *name;
409 };
410
411 STATIC_TRI_STATE_SINGLETON(nfcSingleton);
412 STATIC_TRI_STATE_SINGLETON(nfkcSingleton);
413 STATIC_TRI_STATE_SINGLETON(nfkc_cfSingleton);
414
415 class Norm2Singleton : public SimpleSingletonWrapper<Normalizer2> {
416 public:
Norm2Singleton(SimpleSingleton & s)417 Norm2Singleton(SimpleSingleton &s) : SimpleSingletonWrapper<Normalizer2>(s) {}
getInstance(UErrorCode & errorCode)418 Normalizer2 *getInstance(UErrorCode &errorCode) {
419 return SimpleSingletonWrapper<Normalizer2>::getInstance(createInstance, NULL, errorCode);
420 }
421 private:
createInstance(const void *,UErrorCode & errorCode)422 static void *createInstance(const void *, UErrorCode &errorCode) {
423 Normalizer2 *noop=new NoopNormalizer2;
424 if(noop==NULL) {
425 errorCode=U_MEMORY_ALLOCATION_ERROR;
426 }
427 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
428 return noop;
429 }
430 };
431
432 STATIC_SIMPLE_SINGLETON(noopSingleton);
433
434 static UHashtable *cache=NULL;
435
436 U_CDECL_BEGIN
437
deleteNorm2AllModes(void * allModes)438 static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
439 delete (Norm2AllModes *)allModes;
440 }
441
uprv_normalizer2_cleanup()442 static UBool U_CALLCONV uprv_normalizer2_cleanup() {
443 Norm2AllModesSingleton(nfcSingleton, NULL).deleteInstance();
444 Norm2AllModesSingleton(nfkcSingleton, NULL).deleteInstance();
445 Norm2AllModesSingleton(nfkc_cfSingleton, NULL).deleteInstance();
446 Norm2Singleton(noopSingleton).deleteInstance();
447 uhash_close(cache);
448 cache=NULL;
449 return TRUE;
450 }
451
452 U_CDECL_END
453
getNFCInstance(UErrorCode & errorCode)454 const Normalizer2 *Normalizer2Factory::getNFCInstance(UErrorCode &errorCode) {
455 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
456 return allModes!=NULL ? &allModes->comp : NULL;
457 }
458
getNFDInstance(UErrorCode & errorCode)459 const Normalizer2 *Normalizer2Factory::getNFDInstance(UErrorCode &errorCode) {
460 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
461 return allModes!=NULL ? &allModes->decomp : NULL;
462 }
463
getFCDInstance(UErrorCode & errorCode)464 const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
465 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
466 if(allModes!=NULL) {
467 allModes->impl.getFCDTrie(errorCode);
468 return &allModes->fcd;
469 } else {
470 return NULL;
471 }
472 }
473
getFCCInstance(UErrorCode & errorCode)474 const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
475 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
476 return allModes!=NULL ? &allModes->fcc : NULL;
477 }
478
getNFKCInstance(UErrorCode & errorCode)479 const Normalizer2 *Normalizer2Factory::getNFKCInstance(UErrorCode &errorCode) {
480 Norm2AllModes *allModes=
481 Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
482 return allModes!=NULL ? &allModes->comp : NULL;
483 }
484
getNFKDInstance(UErrorCode & errorCode)485 const Normalizer2 *Normalizer2Factory::getNFKDInstance(UErrorCode &errorCode) {
486 Norm2AllModes *allModes=
487 Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
488 return allModes!=NULL ? &allModes->decomp : NULL;
489 }
490
getNFKC_CFInstance(UErrorCode & errorCode)491 const Normalizer2 *Normalizer2Factory::getNFKC_CFInstance(UErrorCode &errorCode) {
492 Norm2AllModes *allModes=
493 Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
494 return allModes!=NULL ? &allModes->comp : NULL;
495 }
496
getNoopInstance(UErrorCode & errorCode)497 const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) {
498 return Norm2Singleton(noopSingleton).getInstance(errorCode);
499 }
500
501 const Normalizer2 *
getInstance(UNormalizationMode mode,UErrorCode & errorCode)502 Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
503 if(U_FAILURE(errorCode)) {
504 return NULL;
505 }
506 switch(mode) {
507 case UNORM_NFD:
508 return getNFDInstance(errorCode);
509 case UNORM_NFKD:
510 return getNFKDInstance(errorCode);
511 case UNORM_NFC:
512 return getNFCInstance(errorCode);
513 case UNORM_NFKC:
514 return getNFKCInstance(errorCode);
515 case UNORM_FCD:
516 return getFCDInstance(errorCode);
517 default: // UNORM_NONE
518 return getNoopInstance(errorCode);
519 }
520 }
521
522 const Normalizer2Impl *
getNFCImpl(UErrorCode & errorCode)523 Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
524 Norm2AllModes *allModes=
525 Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
526 return allModes!=NULL ? &allModes->impl : NULL;
527 }
528
529 const Normalizer2Impl *
getNFKCImpl(UErrorCode & errorCode)530 Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
531 Norm2AllModes *allModes=
532 Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
533 return allModes!=NULL ? &allModes->impl : NULL;
534 }
535
536 const Normalizer2Impl *
getNFKC_CFImpl(UErrorCode & errorCode)537 Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
538 Norm2AllModes *allModes=
539 Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
540 return allModes!=NULL ? &allModes->impl : NULL;
541 }
542
543 const Normalizer2Impl *
getImpl(const Normalizer2 * norm2)544 Normalizer2Factory::getImpl(const Normalizer2 *norm2) {
545 return &((Normalizer2WithImpl *)norm2)->impl;
546 }
547
548 const UTrie2 *
getFCDTrie(UErrorCode & errorCode)549 Normalizer2Factory::getFCDTrie(UErrorCode &errorCode) {
550 Norm2AllModes *allModes=
551 Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
552 if(allModes!=NULL) {
553 return allModes->impl.getFCDTrie(errorCode);
554 } else {
555 return NULL;
556 }
557 }
558
559 const Normalizer2 *
getInstance(const char * packageName,const char * name,UNormalization2Mode mode,UErrorCode & errorCode)560 Normalizer2::getInstance(const char *packageName,
561 const char *name,
562 UNormalization2Mode mode,
563 UErrorCode &errorCode) {
564 if(U_FAILURE(errorCode)) {
565 return NULL;
566 }
567 if(name==NULL || *name==0) {
568 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
569 }
570 Norm2AllModes *allModes=NULL;
571 if(packageName==NULL) {
572 if(0==uprv_strcmp(name, "nfc")) {
573 allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
574 } else if(0==uprv_strcmp(name, "nfkc")) {
575 allModes=Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
576 } else if(0==uprv_strcmp(name, "nfkc_cf")) {
577 allModes=Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
578 }
579 }
580 if(allModes==NULL && U_SUCCESS(errorCode)) {
581 {
582 Mutex lock;
583 if(cache!=NULL) {
584 allModes=(Norm2AllModes *)uhash_get(cache, name);
585 }
586 }
587 if(allModes==NULL) {
588 LocalPointer<Norm2AllModes> localAllModes(
589 Norm2AllModes::createInstance(packageName, name, errorCode));
590 if(U_SUCCESS(errorCode)) {
591 Mutex lock;
592 if(cache==NULL) {
593 cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode);
594 if(U_FAILURE(errorCode)) {
595 return NULL;
596 }
597 uhash_setKeyDeleter(cache, uprv_free);
598 uhash_setValueDeleter(cache, deleteNorm2AllModes);
599 }
600 void *temp=uhash_get(cache, name);
601 if(temp==NULL) {
602 int32_t keyLength=uprv_strlen(name)+1;
603 char *nameCopy=(char *)uprv_malloc(keyLength);
604 if(nameCopy==NULL) {
605 errorCode=U_MEMORY_ALLOCATION_ERROR;
606 return NULL;
607 }
608 uprv_memcpy(nameCopy, name, keyLength);
609 uhash_put(cache, nameCopy, allModes=localAllModes.orphan(), &errorCode);
610 } else {
611 // race condition
612 allModes=(Norm2AllModes *)temp;
613 }
614 }
615 }
616 }
617 if(allModes!=NULL && U_SUCCESS(errorCode)) {
618 switch(mode) {
619 case UNORM2_COMPOSE:
620 return &allModes->comp;
621 case UNORM2_DECOMPOSE:
622 return &allModes->decomp;
623 case UNORM2_FCD:
624 allModes->impl.getFCDTrie(errorCode);
625 return &allModes->fcd;
626 case UNORM2_COMPOSE_CONTIGUOUS:
627 return &allModes->fcc;
628 default:
629 break; // do nothing
630 }
631 }
632 return NULL;
633 }
634
UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(Normalizer2)635 UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(Normalizer2)
636
637 U_NAMESPACE_END
638
639 // C API ------------------------------------------------------------------- ***
640
641 U_NAMESPACE_USE
642
643 U_DRAFT const UNormalizer2 * U_EXPORT2
644 unorm2_getInstance(const char *packageName,
645 const char *name,
646 UNormalization2Mode mode,
647 UErrorCode *pErrorCode) {
648 return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
649 }
650
651 U_DRAFT void U_EXPORT2
unorm2_close(UNormalizer2 * norm2)652 unorm2_close(UNormalizer2 *norm2) {
653 delete (Normalizer2 *)norm2;
654 }
655
656 U_DRAFT int32_t U_EXPORT2
unorm2_normalize(const UNormalizer2 * norm2,const UChar * src,int32_t length,UChar * dest,int32_t capacity,UErrorCode * pErrorCode)657 unorm2_normalize(const UNormalizer2 *norm2,
658 const UChar *src, int32_t length,
659 UChar *dest, int32_t capacity,
660 UErrorCode *pErrorCode) {
661 if(U_FAILURE(*pErrorCode)) {
662 return 0;
663 }
664 if( (src==NULL ? length!=0 : length<-1) ||
665 (dest==NULL ? capacity!=0 : capacity<0) ||
666 (src==dest && src!=NULL)
667 ) {
668 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
669 return 0;
670 }
671 UnicodeString destString(dest, 0, capacity);
672 // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) would crash.
673 if(length!=0) {
674 const Normalizer2 *n2=(const Normalizer2 *)norm2;
675 const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
676 if(n2wi!=NULL) {
677 // Avoid duplicate argument checking and support NUL-terminated src.
678 ReorderingBuffer buffer(n2wi->impl, destString);
679 if(buffer.init(length, *pErrorCode)) {
680 n2wi->normalize(src, length>=0 ? src+length : NULL, buffer, *pErrorCode);
681 }
682 } else {
683 UnicodeString srcString(length<0, src, length);
684 n2->normalize(srcString, destString, *pErrorCode);
685 }
686 }
687 return destString.extract(dest, capacity, *pErrorCode);
688 }
689
690 static int32_t
normalizeSecondAndAppend(const UNormalizer2 * norm2,UChar * first,int32_t firstLength,int32_t firstCapacity,const UChar * second,int32_t secondLength,UBool doNormalize,UErrorCode * pErrorCode)691 normalizeSecondAndAppend(const UNormalizer2 *norm2,
692 UChar *first, int32_t firstLength, int32_t firstCapacity,
693 const UChar *second, int32_t secondLength,
694 UBool doNormalize,
695 UErrorCode *pErrorCode) {
696 if(U_FAILURE(*pErrorCode)) {
697 return 0;
698 }
699 if( (second==NULL ? secondLength!=0 : secondLength<-1) ||
700 (first==NULL ? (firstCapacity!=0 || firstLength!=0) :
701 (firstCapacity<0 || firstLength<-1)) ||
702 (first==second && first!=NULL)
703 ) {
704 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
705 return 0;
706 }
707 UnicodeString firstString(first, firstLength, firstCapacity);
708 firstLength=firstString.length(); // In case it was -1.
709 // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash.
710 if(secondLength!=0) {
711 const Normalizer2 *n2=(const Normalizer2 *)norm2;
712 const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
713 if(n2wi!=NULL) {
714 // Avoid duplicate argument checking and support NUL-terminated src.
715 UnicodeString safeMiddle;
716 {
717 ReorderingBuffer buffer(n2wi->impl, firstString);
718 if(buffer.init(firstLength+secondLength+1, *pErrorCode)) { // destCapacity>=-1
719 n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL,
720 doNormalize, safeMiddle, buffer, *pErrorCode);
721 }
722 } // The ReorderingBuffer destructor finalizes firstString.
723 if(U_FAILURE(*pErrorCode) || firstString.length()>firstCapacity) {
724 // Restore the modified suffix of the first string.
725 // This does not restore first[] array contents between firstLength and firstCapacity.
726 // (That might be uninitialized memory, as far as we know.)
727 safeMiddle.extract(0, 0x7fffffff, first+firstLength-safeMiddle.length());
728 if(firstLength<firstCapacity) {
729 first[firstLength]=0; // NUL-terminate in case it was originally.
730 }
731 }
732 } else {
733 UnicodeString secondString(secondLength<0, second, secondLength);
734 if(doNormalize) {
735 n2->normalizeSecondAndAppend(firstString, secondString, *pErrorCode);
736 } else {
737 n2->append(firstString, secondString, *pErrorCode);
738 }
739 }
740 }
741 return firstString.extract(first, firstCapacity, *pErrorCode);
742 }
743
744 U_DRAFT int32_t U_EXPORT2
unorm2_normalizeSecondAndAppend(const UNormalizer2 * norm2,UChar * first,int32_t firstLength,int32_t firstCapacity,const UChar * second,int32_t secondLength,UErrorCode * pErrorCode)745 unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2,
746 UChar *first, int32_t firstLength, int32_t firstCapacity,
747 const UChar *second, int32_t secondLength,
748 UErrorCode *pErrorCode) {
749 return normalizeSecondAndAppend(norm2,
750 first, firstLength, firstCapacity,
751 second, secondLength,
752 TRUE, pErrorCode);
753 }
754
755 U_DRAFT int32_t U_EXPORT2
unorm2_append(const UNormalizer2 * norm2,UChar * first,int32_t firstLength,int32_t firstCapacity,const UChar * second,int32_t secondLength,UErrorCode * pErrorCode)756 unorm2_append(const UNormalizer2 *norm2,
757 UChar *first, int32_t firstLength, int32_t firstCapacity,
758 const UChar *second, int32_t secondLength,
759 UErrorCode *pErrorCode) {
760 return normalizeSecondAndAppend(norm2,
761 first, firstLength, firstCapacity,
762 second, secondLength,
763 FALSE, pErrorCode);
764 }
765
766 U_DRAFT int32_t U_EXPORT2
unorm2_getDecomposition(const UNormalizer2 * norm2,UChar32 c,UChar * decomposition,int32_t capacity,UErrorCode * pErrorCode)767 unorm2_getDecomposition(const UNormalizer2 *norm2,
768 UChar32 c, UChar *decomposition, int32_t capacity,
769 UErrorCode *pErrorCode) {
770 if(U_FAILURE(*pErrorCode)) {
771 return 0;
772 }
773 if(decomposition==NULL ? capacity!=0 : capacity<0) {
774 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
775 return 0;
776 }
777 UnicodeString destString(decomposition, 0, capacity);
778 if(reinterpret_cast<const Normalizer2 *>(norm2)->getDecomposition(c, destString)) {
779 return destString.extract(decomposition, capacity, *pErrorCode);
780 } else {
781 return -1;
782 }
783 }
784
785 U_DRAFT UBool U_EXPORT2
unorm2_isNormalized(const UNormalizer2 * norm2,const UChar * s,int32_t length,UErrorCode * pErrorCode)786 unorm2_isNormalized(const UNormalizer2 *norm2,
787 const UChar *s, int32_t length,
788 UErrorCode *pErrorCode) {
789 if(U_FAILURE(*pErrorCode)) {
790 return 0;
791 }
792 if((s==NULL && length!=0) || length<-1) {
793 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
794 return 0;
795 }
796 UnicodeString sString(length<0, s, length);
797 return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode);
798 }
799
800 U_DRAFT UNormalizationCheckResult U_EXPORT2
unorm2_quickCheck(const UNormalizer2 * norm2,const UChar * s,int32_t length,UErrorCode * pErrorCode)801 unorm2_quickCheck(const UNormalizer2 *norm2,
802 const UChar *s, int32_t length,
803 UErrorCode *pErrorCode) {
804 if(U_FAILURE(*pErrorCode)) {
805 return UNORM_NO;
806 }
807 if((s==NULL && length!=0) || length<-1) {
808 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
809 return UNORM_NO;
810 }
811 UnicodeString sString(length<0, s, length);
812 return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode);
813 }
814
815 U_DRAFT int32_t U_EXPORT2
unorm2_spanQuickCheckYes(const UNormalizer2 * norm2,const UChar * s,int32_t length,UErrorCode * pErrorCode)816 unorm2_spanQuickCheckYes(const UNormalizer2 *norm2,
817 const UChar *s, int32_t length,
818 UErrorCode *pErrorCode) {
819 if(U_FAILURE(*pErrorCode)) {
820 return 0;
821 }
822 if((s==NULL && length!=0) || length<-1) {
823 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
824 return 0;
825 }
826 UnicodeString sString(length<0, s, length);
827 return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode);
828 }
829
830 U_DRAFT UBool U_EXPORT2
unorm2_hasBoundaryBefore(const UNormalizer2 * norm2,UChar32 c)831 unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) {
832 return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c);
833 }
834
835 U_DRAFT UBool U_EXPORT2
unorm2_hasBoundaryAfter(const UNormalizer2 * norm2,UChar32 c)836 unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) {
837 return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c);
838 }
839
840 U_DRAFT UBool U_EXPORT2
unorm2_isInert(const UNormalizer2 * norm2,UChar32 c)841 unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) {
842 return ((const Normalizer2 *)norm2)->isInert(c);
843 }
844
845 // Some properties APIs ---------------------------------------------------- ***
846
847 U_CFUNC UNormalizationCheckResult U_EXPORT2
unorm_getQuickCheck(UChar32 c,UNormalizationMode mode)848 unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
849 if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
850 return UNORM_YES;
851 }
852 UErrorCode errorCode=U_ZERO_ERROR;
853 const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
854 if(U_SUCCESS(errorCode)) {
855 return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
856 } else {
857 return UNORM_MAYBE;
858 }
859 }
860
861 U_CAPI const uint16_t * U_EXPORT2
unorm_getFCDTrieIndex(UChar32 & fcdHighStart,UErrorCode * pErrorCode)862 unorm_getFCDTrieIndex(UChar32 &fcdHighStart, UErrorCode *pErrorCode) {
863 const UTrie2 *trie=Normalizer2Factory::getFCDTrie(*pErrorCode);
864 if(U_SUCCESS(*pErrorCode)) {
865 fcdHighStart=trie->highStart;
866 return trie->index;
867 } else {
868 return NULL;
869 }
870 }
871
872 #endif // !UCONFIG_NO_NORMALIZATION
873