1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2009-2010, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: normalizer2.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2009nov22
14 * created by: Markus W. Scherer
15 */
16
17 #include "unicode/utypes.h"
18
19 #if !UCONFIG_NO_NORMALIZATION
20
21 #include "unicode/localpointer.h"
22 #include "unicode/normalizer2.h"
23 #include "unicode/unistr.h"
24 #include "unicode/unorm.h"
25 #include "cpputils.h"
26 #include "cstring.h"
27 #include "mutex.h"
28 #include "normalizer2impl.h"
29 #include "ucln_cmn.h"
30 #include "uhash.h"
31
32 U_NAMESPACE_BEGIN
33
34 // Public API dispatch via Normalizer2 subclasses -------------------------- ***
35
36 // Normalizer2 implementation for the old UNORM_NONE.
37 class NoopNormalizer2 : public Normalizer2 {
38 virtual UnicodeString &
normalize(const UnicodeString & src,UnicodeString & dest,UErrorCode & errorCode) const39 normalize(const UnicodeString &src,
40 UnicodeString &dest,
41 UErrorCode &errorCode) const {
42 if(U_SUCCESS(errorCode)) {
43 if(&dest!=&src) {
44 dest=src;
45 } else {
46 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
47 }
48 }
49 return dest;
50 }
51 virtual UnicodeString &
normalizeSecondAndAppend(UnicodeString & first,const UnicodeString & second,UErrorCode & errorCode) const52 normalizeSecondAndAppend(UnicodeString &first,
53 const UnicodeString &second,
54 UErrorCode &errorCode) const {
55 if(U_SUCCESS(errorCode)) {
56 if(&first!=&second) {
57 first.append(second);
58 } else {
59 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
60 }
61 }
62 return first;
63 }
64 virtual UnicodeString &
append(UnicodeString & first,const UnicodeString & second,UErrorCode & errorCode) const65 append(UnicodeString &first,
66 const UnicodeString &second,
67 UErrorCode &errorCode) const {
68 if(U_SUCCESS(errorCode)) {
69 if(&first!=&second) {
70 first.append(second);
71 } else {
72 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
73 }
74 }
75 return first;
76 }
77 virtual UBool
getDecomposition(UChar32,UnicodeString &) const78 getDecomposition(UChar32, UnicodeString &) const {
79 return FALSE;
80 }
81 virtual UBool
isNormalized(const UnicodeString &,UErrorCode &) const82 isNormalized(const UnicodeString &, UErrorCode &) const {
83 return TRUE;
84 }
85 virtual UNormalizationCheckResult
quickCheck(const UnicodeString &,UErrorCode &) const86 quickCheck(const UnicodeString &, UErrorCode &) const {
87 return UNORM_YES;
88 }
89 virtual int32_t
spanQuickCheckYes(const UnicodeString & s,UErrorCode &) const90 spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const {
91 return s.length();
92 }
hasBoundaryBefore(UChar32) const93 virtual UBool hasBoundaryBefore(UChar32) const { return TRUE; }
hasBoundaryAfter(UChar32) const94 virtual UBool hasBoundaryAfter(UChar32) const { return TRUE; }
isInert(UChar32) const95 virtual UBool isInert(UChar32) const { return TRUE; }
96 };
97
98 // Intermediate class:
99 // Has Normalizer2Impl and does boilerplate argument checking and setup.
100 class Normalizer2WithImpl : public Normalizer2 {
101 public:
Normalizer2WithImpl(const Normalizer2Impl & ni)102 Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
103
104 // normalize
105 virtual UnicodeString &
normalize(const UnicodeString & src,UnicodeString & dest,UErrorCode & errorCode) const106 normalize(const UnicodeString &src,
107 UnicodeString &dest,
108 UErrorCode &errorCode) const {
109 if(U_FAILURE(errorCode)) {
110 dest.setToBogus();
111 return dest;
112 }
113 const UChar *sArray=src.getBuffer();
114 if(&dest==&src || sArray==NULL) {
115 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
116 dest.setToBogus();
117 return dest;
118 }
119 dest.remove();
120 ReorderingBuffer buffer(impl, dest);
121 if(buffer.init(src.length(), errorCode)) {
122 normalize(sArray, sArray+src.length(), buffer, errorCode);
123 }
124 return dest;
125 }
126 virtual void
127 normalize(const UChar *src, const UChar *limit,
128 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
129
130 // normalize and append
131 virtual UnicodeString &
normalizeSecondAndAppend(UnicodeString & first,const UnicodeString & second,UErrorCode & errorCode) const132 normalizeSecondAndAppend(UnicodeString &first,
133 const UnicodeString &second,
134 UErrorCode &errorCode) const {
135 return normalizeSecondAndAppend(first, second, TRUE, errorCode);
136 }
137 virtual UnicodeString &
append(UnicodeString & first,const UnicodeString & second,UErrorCode & errorCode) const138 append(UnicodeString &first,
139 const UnicodeString &second,
140 UErrorCode &errorCode) const {
141 return normalizeSecondAndAppend(first, second, FALSE, errorCode);
142 }
143 UnicodeString &
normalizeSecondAndAppend(UnicodeString & first,const UnicodeString & second,UBool doNormalize,UErrorCode & errorCode) const144 normalizeSecondAndAppend(UnicodeString &first,
145 const UnicodeString &second,
146 UBool doNormalize,
147 UErrorCode &errorCode) const {
148 uprv_checkCanGetBuffer(first, errorCode);
149 if(U_FAILURE(errorCode)) {
150 return first;
151 }
152 const UChar *secondArray=second.getBuffer();
153 if(&first==&second || secondArray==NULL) {
154 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
155 return first;
156 }
157 ReorderingBuffer buffer(impl, first);
158 if(buffer.init(first.length()+second.length(), errorCode)) {
159 normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
160 buffer, errorCode);
161 }
162 return first;
163 }
164 virtual void
165 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
166 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
167 virtual UBool
getDecomposition(UChar32 c,UnicodeString & decomposition) const168 getDecomposition(UChar32 c, UnicodeString &decomposition) const {
169 UChar buffer[4];
170 int32_t length;
171 const UChar *d=impl.getDecomposition(c, buffer, length);
172 if(d==NULL) {
173 return FALSE;
174 }
175 if(d==buffer) {
176 decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c)
177 } else {
178 decomposition.setTo(FALSE, d, length); // read-only alias
179 }
180 return TRUE;
181 }
182
183 // quick checks
184 virtual UBool
isNormalized(const UnicodeString & s,UErrorCode & errorCode) const185 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
186 if(U_FAILURE(errorCode)) {
187 return FALSE;
188 }
189 const UChar *sArray=s.getBuffer();
190 if(sArray==NULL) {
191 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
192 return FALSE;
193 }
194 const UChar *sLimit=sArray+s.length();
195 return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
196 }
197 virtual UNormalizationCheckResult
quickCheck(const UnicodeString & s,UErrorCode & errorCode) const198 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
199 return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
200 }
201 virtual int32_t
spanQuickCheckYes(const UnicodeString & s,UErrorCode & errorCode) const202 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
203 if(U_FAILURE(errorCode)) {
204 return 0;
205 }
206 const UChar *sArray=s.getBuffer();
207 if(sArray==NULL) {
208 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
209 return 0;
210 }
211 return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
212 }
213 virtual const UChar *
214 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0;
215
getQuickCheck(UChar32) const216 virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
217 return UNORM_YES;
218 }
219
220 const Normalizer2Impl &impl;
221 };
222
223 class DecomposeNormalizer2 : public Normalizer2WithImpl {
224 public:
DecomposeNormalizer2(const Normalizer2Impl & ni)225 DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
226
227 private:
228 virtual void
normalize(const UChar * src,const UChar * limit,ReorderingBuffer & buffer,UErrorCode & errorCode) const229 normalize(const UChar *src, const UChar *limit,
230 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
231 impl.decompose(src, limit, &buffer, errorCode);
232 }
233 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
234 virtual void
normalizeAndAppend(const UChar * src,const UChar * limit,UBool doNormalize,ReorderingBuffer & buffer,UErrorCode & errorCode) const235 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
236 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
237 impl.decomposeAndAppend(src, limit, doNormalize, buffer, errorCode);
238 }
239 virtual const UChar *
spanQuickCheckYes(const UChar * src,const UChar * limit,UErrorCode & errorCode) const240 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
241 return impl.decompose(src, limit, NULL, errorCode);
242 }
243 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
getQuickCheck(UChar32 c) const244 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
245 return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
246 }
hasBoundaryBefore(UChar32 c) const247 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); }
hasBoundaryAfter(UChar32 c) const248 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); }
isInert(UChar32 c) const249 virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
250 };
251
252 class ComposeNormalizer2 : public Normalizer2WithImpl {
253 public:
ComposeNormalizer2(const Normalizer2Impl & ni,UBool fcc)254 ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
255 Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
256
257 private:
258 virtual void
normalize(const UChar * src,const UChar * limit,ReorderingBuffer & buffer,UErrorCode & errorCode) const259 normalize(const UChar *src, const UChar *limit,
260 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
261 impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
262 }
263 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
264 virtual void
normalizeAndAppend(const UChar * src,const UChar * limit,UBool doNormalize,ReorderingBuffer & buffer,UErrorCode & errorCode) const265 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
266 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
267 impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, buffer, errorCode);
268 }
269
270 virtual UBool
isNormalized(const UnicodeString & s,UErrorCode & errorCode) const271 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
272 if(U_FAILURE(errorCode)) {
273 return FALSE;
274 }
275 const UChar *sArray=s.getBuffer();
276 if(sArray==NULL) {
277 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
278 return FALSE;
279 }
280 UnicodeString temp;
281 ReorderingBuffer buffer(impl, temp);
282 if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization
283 return FALSE;
284 }
285 return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode);
286 }
287 virtual UNormalizationCheckResult
quickCheck(const UnicodeString & s,UErrorCode & errorCode) const288 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
289 if(U_FAILURE(errorCode)) {
290 return UNORM_MAYBE;
291 }
292 const UChar *sArray=s.getBuffer();
293 if(sArray==NULL) {
294 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
295 return UNORM_MAYBE;
296 }
297 UNormalizationCheckResult qcResult=UNORM_YES;
298 impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
299 return qcResult;
300 }
301 virtual const UChar *
spanQuickCheckYes(const UChar * src,const UChar * limit,UErrorCode &) const302 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const {
303 return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
304 }
305 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
getQuickCheck(UChar32 c) const306 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
307 return impl.getCompQuickCheck(impl.getNorm16(c));
308 }
hasBoundaryBefore(UChar32 c) const309 virtual UBool hasBoundaryBefore(UChar32 c) const {
310 return impl.hasCompBoundaryBefore(c);
311 }
hasBoundaryAfter(UChar32 c) const312 virtual UBool hasBoundaryAfter(UChar32 c) const {
313 return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE);
314 }
isInert(UChar32 c) const315 virtual UBool isInert(UChar32 c) const {
316 return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE);
317 }
318
319 const UBool onlyContiguous;
320 };
321
322 class FCDNormalizer2 : public Normalizer2WithImpl {
323 public:
FCDNormalizer2(const Normalizer2Impl & ni)324 FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
325
326 private:
327 virtual void
normalize(const UChar * src,const UChar * limit,ReorderingBuffer & buffer,UErrorCode & errorCode) const328 normalize(const UChar *src, const UChar *limit,
329 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
330 impl.makeFCD(src, limit, &buffer, errorCode);
331 }
332 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
333 virtual void
normalizeAndAppend(const UChar * src,const UChar * limit,UBool doNormalize,ReorderingBuffer & buffer,UErrorCode & errorCode) const334 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
335 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
336 impl.makeFCDAndAppend(src, limit, doNormalize, buffer, errorCode);
337 }
338 virtual const UChar *
spanQuickCheckYes(const UChar * src,const UChar * limit,UErrorCode & errorCode) const339 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
340 return impl.makeFCD(src, limit, NULL, errorCode);
341 }
342 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
hasBoundaryBefore(UChar32 c) const343 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
hasBoundaryAfter(UChar32 c) const344 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
isInert(UChar32 c) const345 virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
346 };
347
348 // instance cache ---------------------------------------------------------- ***
349
350 struct Norm2AllModes : public UMemory {
351 static Norm2AllModes *createInstance(const char *packageName,
352 const char *name,
353 UErrorCode &errorCode);
Norm2AllModesNorm2AllModes354 Norm2AllModes() : comp(impl, FALSE), decomp(impl), fcd(impl), fcc(impl, TRUE) {}
355
356 Normalizer2Impl impl;
357 ComposeNormalizer2 comp;
358 DecomposeNormalizer2 decomp;
359 FCDNormalizer2 fcd;
360 ComposeNormalizer2 fcc;
361 };
362
363 Norm2AllModes *
createInstance(const char * packageName,const char * name,UErrorCode & errorCode)364 Norm2AllModes::createInstance(const char *packageName,
365 const char *name,
366 UErrorCode &errorCode) {
367 if(U_FAILURE(errorCode)) {
368 return NULL;
369 }
370 LocalPointer<Norm2AllModes> allModes(new Norm2AllModes);
371 if(allModes.isNull()) {
372 errorCode=U_MEMORY_ALLOCATION_ERROR;
373 return NULL;
374 }
375 allModes->impl.load(packageName, name, errorCode);
376 return U_SUCCESS(errorCode) ? allModes.orphan() : NULL;
377 }
378
379 U_CDECL_BEGIN
380 static UBool U_CALLCONV uprv_normalizer2_cleanup();
381 U_CDECL_END
382
383 class Norm2AllModesSingleton : public TriStateSingletonWrapper<Norm2AllModes> {
384 public:
Norm2AllModesSingleton(TriStateSingleton & s,const char * n)385 Norm2AllModesSingleton(TriStateSingleton &s, const char *n) :
386 TriStateSingletonWrapper<Norm2AllModes>(s), name(n) {}
getInstance(UErrorCode & errorCode)387 Norm2AllModes *getInstance(UErrorCode &errorCode) {
388 return TriStateSingletonWrapper<Norm2AllModes>::getInstance(createInstance, name, errorCode);
389 }
390 private:
createInstance(const void * context,UErrorCode & errorCode)391 static void *createInstance(const void *context, UErrorCode &errorCode) {
392 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
393 return Norm2AllModes::createInstance(NULL, (const char *)context, errorCode);
394 }
395
396 const char *name;
397 };
398
399 STATIC_TRI_STATE_SINGLETON(nfcSingleton);
400 STATIC_TRI_STATE_SINGLETON(nfkcSingleton);
401 STATIC_TRI_STATE_SINGLETON(nfkc_cfSingleton);
402
403 class Norm2Singleton : public SimpleSingletonWrapper<Normalizer2> {
404 public:
Norm2Singleton(SimpleSingleton & s)405 Norm2Singleton(SimpleSingleton &s) : SimpleSingletonWrapper<Normalizer2>(s) {}
getInstance(UErrorCode & errorCode)406 Normalizer2 *getInstance(UErrorCode &errorCode) {
407 return SimpleSingletonWrapper<Normalizer2>::getInstance(createInstance, NULL, errorCode);
408 }
409 private:
createInstance(const void *,UErrorCode & errorCode)410 static void *createInstance(const void *, UErrorCode &errorCode) {
411 Normalizer2 *noop=new NoopNormalizer2;
412 if(noop==NULL) {
413 errorCode=U_MEMORY_ALLOCATION_ERROR;
414 }
415 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
416 return noop;
417 }
418 };
419
420 STATIC_SIMPLE_SINGLETON(noopSingleton);
421
422 static UHashtable *cache=NULL;
423
424 U_CDECL_BEGIN
425
deleteNorm2AllModes(void * allModes)426 static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
427 delete (Norm2AllModes *)allModes;
428 }
429
uprv_normalizer2_cleanup()430 static UBool U_CALLCONV uprv_normalizer2_cleanup() {
431 Norm2AllModesSingleton(nfcSingleton, NULL).deleteInstance();
432 Norm2AllModesSingleton(nfkcSingleton, NULL).deleteInstance();
433 Norm2AllModesSingleton(nfkc_cfSingleton, NULL).deleteInstance();
434 Norm2Singleton(noopSingleton).deleteInstance();
435 uhash_close(cache);
436 cache=NULL;
437 return TRUE;
438 }
439
440 U_CDECL_END
441
getNFCInstance(UErrorCode & errorCode)442 const Normalizer2 *Normalizer2Factory::getNFCInstance(UErrorCode &errorCode) {
443 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
444 return allModes!=NULL ? &allModes->comp : NULL;
445 }
446
getNFDInstance(UErrorCode & errorCode)447 const Normalizer2 *Normalizer2Factory::getNFDInstance(UErrorCode &errorCode) {
448 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
449 return allModes!=NULL ? &allModes->decomp : NULL;
450 }
451
getFCDInstance(UErrorCode & errorCode)452 const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
453 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
454 if(allModes!=NULL) {
455 allModes->impl.getFCDTrie(errorCode);
456 return &allModes->fcd;
457 } else {
458 return NULL;
459 }
460 }
461
getFCCInstance(UErrorCode & errorCode)462 const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
463 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
464 return allModes!=NULL ? &allModes->fcc : NULL;
465 }
466
getNFKCInstance(UErrorCode & errorCode)467 const Normalizer2 *Normalizer2Factory::getNFKCInstance(UErrorCode &errorCode) {
468 Norm2AllModes *allModes=
469 Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
470 return allModes!=NULL ? &allModes->comp : NULL;
471 }
472
getNFKDInstance(UErrorCode & errorCode)473 const Normalizer2 *Normalizer2Factory::getNFKDInstance(UErrorCode &errorCode) {
474 Norm2AllModes *allModes=
475 Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
476 return allModes!=NULL ? &allModes->decomp : NULL;
477 }
478
getNFKC_CFInstance(UErrorCode & errorCode)479 const Normalizer2 *Normalizer2Factory::getNFKC_CFInstance(UErrorCode &errorCode) {
480 Norm2AllModes *allModes=
481 Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
482 return allModes!=NULL ? &allModes->comp : NULL;
483 }
484
getNoopInstance(UErrorCode & errorCode)485 const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) {
486 return Norm2Singleton(noopSingleton).getInstance(errorCode);
487 }
488
489 const Normalizer2 *
getInstance(UNormalizationMode mode,UErrorCode & errorCode)490 Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
491 if(U_FAILURE(errorCode)) {
492 return NULL;
493 }
494 switch(mode) {
495 case UNORM_NFD:
496 return getNFDInstance(errorCode);
497 case UNORM_NFKD:
498 return getNFKDInstance(errorCode);
499 case UNORM_NFC:
500 return getNFCInstance(errorCode);
501 case UNORM_NFKC:
502 return getNFKCInstance(errorCode);
503 case UNORM_FCD:
504 return getFCDInstance(errorCode);
505 default: // UNORM_NONE
506 return getNoopInstance(errorCode);
507 }
508 }
509
510 const Normalizer2Impl *
getNFCImpl(UErrorCode & errorCode)511 Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
512 Norm2AllModes *allModes=
513 Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
514 return allModes!=NULL ? &allModes->impl : NULL;
515 }
516
517 const Normalizer2Impl *
getNFKCImpl(UErrorCode & errorCode)518 Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
519 Norm2AllModes *allModes=
520 Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
521 return allModes!=NULL ? &allModes->impl : NULL;
522 }
523
524 const Normalizer2Impl *
getNFKC_CFImpl(UErrorCode & errorCode)525 Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
526 Norm2AllModes *allModes=
527 Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
528 return allModes!=NULL ? &allModes->impl : NULL;
529 }
530
531 const Normalizer2Impl *
getImpl(const Normalizer2 * norm2)532 Normalizer2Factory::getImpl(const Normalizer2 *norm2) {
533 return &((Normalizer2WithImpl *)norm2)->impl;
534 }
535
536 const UTrie2 *
getFCDTrie(UErrorCode & errorCode)537 Normalizer2Factory::getFCDTrie(UErrorCode &errorCode) {
538 Norm2AllModes *allModes=
539 Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
540 if(allModes!=NULL) {
541 return allModes->impl.getFCDTrie(errorCode);
542 } else {
543 return NULL;
544 }
545 }
546
547 const Normalizer2 *
getInstance(const char * packageName,const char * name,UNormalization2Mode mode,UErrorCode & errorCode)548 Normalizer2::getInstance(const char *packageName,
549 const char *name,
550 UNormalization2Mode mode,
551 UErrorCode &errorCode) {
552 if(U_FAILURE(errorCode)) {
553 return NULL;
554 }
555 if(name==NULL || *name==0) {
556 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
557 }
558 Norm2AllModes *allModes=NULL;
559 if(packageName==NULL) {
560 if(0==uprv_strcmp(name, "nfc")) {
561 allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
562 } else if(0==uprv_strcmp(name, "nfkc")) {
563 allModes=Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
564 } else if(0==uprv_strcmp(name, "nfkc_cf")) {
565 allModes=Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
566 }
567 }
568 if(allModes==NULL && U_SUCCESS(errorCode)) {
569 {
570 Mutex lock;
571 if(cache!=NULL) {
572 allModes=(Norm2AllModes *)uhash_get(cache, name);
573 }
574 }
575 if(allModes==NULL) {
576 LocalPointer<Norm2AllModes> localAllModes(
577 Norm2AllModes::createInstance(packageName, name, errorCode));
578 if(U_SUCCESS(errorCode)) {
579 Mutex lock;
580 if(cache==NULL) {
581 cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode);
582 if(U_FAILURE(errorCode)) {
583 return NULL;
584 }
585 uhash_setKeyDeleter(cache, uprv_free);
586 uhash_setValueDeleter(cache, deleteNorm2AllModes);
587 }
588 void *temp=uhash_get(cache, name);
589 if(temp==NULL) {
590 int32_t keyLength=uprv_strlen(name)+1;
591 char *nameCopy=(char *)uprv_malloc(keyLength);
592 if(nameCopy==NULL) {
593 errorCode=U_MEMORY_ALLOCATION_ERROR;
594 return NULL;
595 }
596 uprv_memcpy(nameCopy, name, keyLength);
597 uhash_put(cache, nameCopy, allModes=localAllModes.orphan(), &errorCode);
598 } else {
599 // race condition
600 allModes=(Norm2AllModes *)temp;
601 }
602 }
603 }
604 }
605 if(allModes!=NULL && U_SUCCESS(errorCode)) {
606 switch(mode) {
607 case UNORM2_COMPOSE:
608 return &allModes->comp;
609 case UNORM2_DECOMPOSE:
610 return &allModes->decomp;
611 case UNORM2_FCD:
612 allModes->impl.getFCDTrie(errorCode);
613 return &allModes->fcd;
614 case UNORM2_COMPOSE_CONTIGUOUS:
615 return &allModes->fcc;
616 default:
617 break; // do nothing
618 }
619 }
620 return NULL;
621 }
622
UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(Normalizer2)623 UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(Normalizer2)
624
625 U_NAMESPACE_END
626
627 // C API ------------------------------------------------------------------- ***
628
629 U_NAMESPACE_USE
630
631 U_DRAFT const UNormalizer2 * U_EXPORT2
632 unorm2_getInstance(const char *packageName,
633 const char *name,
634 UNormalization2Mode mode,
635 UErrorCode *pErrorCode) {
636 return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
637 }
638
639 U_DRAFT void U_EXPORT2
unorm2_close(UNormalizer2 * norm2)640 unorm2_close(UNormalizer2 *norm2) {
641 delete (Normalizer2 *)norm2;
642 }
643
644 U_DRAFT int32_t U_EXPORT2
unorm2_normalize(const UNormalizer2 * norm2,const UChar * src,int32_t length,UChar * dest,int32_t capacity,UErrorCode * pErrorCode)645 unorm2_normalize(const UNormalizer2 *norm2,
646 const UChar *src, int32_t length,
647 UChar *dest, int32_t capacity,
648 UErrorCode *pErrorCode) {
649 if(U_FAILURE(*pErrorCode)) {
650 return 0;
651 }
652 if( (src==NULL ? length!=0 : length<-1) ||
653 (dest==NULL ? capacity!=0 : capacity<0) ||
654 (src==dest && src!=NULL)
655 ) {
656 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
657 return 0;
658 }
659 UnicodeString destString(dest, 0, capacity);
660 // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) would crash.
661 if(length!=0) {
662 const Normalizer2 *n2=(const Normalizer2 *)norm2;
663 const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
664 if(n2wi!=NULL) {
665 // Avoid duplicate argument checking and support NUL-terminated src.
666 ReorderingBuffer buffer(n2wi->impl, destString);
667 if(buffer.init(length, *pErrorCode)) {
668 n2wi->normalize(src, length>=0 ? src+length : NULL, buffer, *pErrorCode);
669 }
670 } else {
671 UnicodeString srcString(length<0, src, length);
672 n2->normalize(srcString, destString, *pErrorCode);
673 }
674 }
675 return destString.extract(dest, capacity, *pErrorCode);
676 }
677
678 static int32_t
normalizeSecondAndAppend(const UNormalizer2 * norm2,UChar * first,int32_t firstLength,int32_t firstCapacity,const UChar * second,int32_t secondLength,UBool doNormalize,UErrorCode * pErrorCode)679 normalizeSecondAndAppend(const UNormalizer2 *norm2,
680 UChar *first, int32_t firstLength, int32_t firstCapacity,
681 const UChar *second, int32_t secondLength,
682 UBool doNormalize,
683 UErrorCode *pErrorCode) {
684 if(U_FAILURE(*pErrorCode)) {
685 return 0;
686 }
687 if( (second==NULL ? secondLength!=0 : secondLength<-1) ||
688 (first==NULL ? (firstCapacity!=0 || firstLength!=0) :
689 (firstCapacity<0 || firstLength<-1)) ||
690 (first==second && first!=NULL)
691 ) {
692 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
693 return 0;
694 }
695 UnicodeString firstString(first, firstLength, firstCapacity);
696 // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash.
697 if(secondLength!=0) {
698 const Normalizer2 *n2=(const Normalizer2 *)norm2;
699 const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
700 if(n2wi!=NULL) {
701 // Avoid duplicate argument checking and support NUL-terminated src.
702 ReorderingBuffer buffer(n2wi->impl, firstString);
703 if(buffer.init(firstLength+secondLength+1, *pErrorCode)) { // destCapacity>=-1
704 n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL,
705 doNormalize, buffer, *pErrorCode);
706 }
707 } else {
708 UnicodeString secondString(secondLength<0, second, secondLength);
709 if(doNormalize) {
710 n2->normalizeSecondAndAppend(firstString, secondString, *pErrorCode);
711 } else {
712 n2->append(firstString, secondString, *pErrorCode);
713 }
714 }
715 }
716 return firstString.extract(first, firstCapacity, *pErrorCode);
717 }
718
719 U_DRAFT int32_t U_EXPORT2
unorm2_normalizeSecondAndAppend(const UNormalizer2 * norm2,UChar * first,int32_t firstLength,int32_t firstCapacity,const UChar * second,int32_t secondLength,UErrorCode * pErrorCode)720 unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2,
721 UChar *first, int32_t firstLength, int32_t firstCapacity,
722 const UChar *second, int32_t secondLength,
723 UErrorCode *pErrorCode) {
724 return normalizeSecondAndAppend(norm2,
725 first, firstLength, firstCapacity,
726 second, secondLength,
727 TRUE, pErrorCode);
728 }
729
730 U_DRAFT int32_t U_EXPORT2
unorm2_append(const UNormalizer2 * norm2,UChar * first,int32_t firstLength,int32_t firstCapacity,const UChar * second,int32_t secondLength,UErrorCode * pErrorCode)731 unorm2_append(const UNormalizer2 *norm2,
732 UChar *first, int32_t firstLength, int32_t firstCapacity,
733 const UChar *second, int32_t secondLength,
734 UErrorCode *pErrorCode) {
735 return normalizeSecondAndAppend(norm2,
736 first, firstLength, firstCapacity,
737 second, secondLength,
738 FALSE, pErrorCode);
739 }
740
741 U_DRAFT int32_t U_EXPORT2
unorm2_getDecomposition(const UNormalizer2 * norm2,UChar32 c,UChar * decomposition,int32_t capacity,UErrorCode * pErrorCode)742 unorm2_getDecomposition(const UNormalizer2 *norm2,
743 UChar32 c, UChar *decomposition, int32_t capacity,
744 UErrorCode *pErrorCode) {
745 if(U_FAILURE(*pErrorCode)) {
746 return 0;
747 }
748 if(decomposition==NULL ? capacity!=0 : capacity<0) {
749 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
750 return 0;
751 }
752 UnicodeString destString(decomposition, 0, capacity);
753 if(reinterpret_cast<const Normalizer2 *>(norm2)->getDecomposition(c, destString)) {
754 return destString.extract(decomposition, capacity, *pErrorCode);
755 } else {
756 return -1;
757 }
758 }
759
760 U_DRAFT UBool U_EXPORT2
unorm2_isNormalized(const UNormalizer2 * norm2,const UChar * s,int32_t length,UErrorCode * pErrorCode)761 unorm2_isNormalized(const UNormalizer2 *norm2,
762 const UChar *s, int32_t length,
763 UErrorCode *pErrorCode) {
764 if(U_FAILURE(*pErrorCode)) {
765 return 0;
766 }
767 if((s==NULL && length!=0) || length<-1) {
768 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
769 return 0;
770 }
771 UnicodeString sString(length<0, s, length);
772 return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode);
773 }
774
775 U_DRAFT UNormalizationCheckResult U_EXPORT2
unorm2_quickCheck(const UNormalizer2 * norm2,const UChar * s,int32_t length,UErrorCode * pErrorCode)776 unorm2_quickCheck(const UNormalizer2 *norm2,
777 const UChar *s, int32_t length,
778 UErrorCode *pErrorCode) {
779 if(U_FAILURE(*pErrorCode)) {
780 return UNORM_NO;
781 }
782 if((s==NULL && length!=0) || length<-1) {
783 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
784 return UNORM_NO;
785 }
786 UnicodeString sString(length<0, s, length);
787 return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode);
788 }
789
790 U_DRAFT int32_t U_EXPORT2
unorm2_spanQuickCheckYes(const UNormalizer2 * norm2,const UChar * s,int32_t length,UErrorCode * pErrorCode)791 unorm2_spanQuickCheckYes(const UNormalizer2 *norm2,
792 const UChar *s, int32_t length,
793 UErrorCode *pErrorCode) {
794 if(U_FAILURE(*pErrorCode)) {
795 return 0;
796 }
797 if((s==NULL && length!=0) || length<-1) {
798 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
799 return 0;
800 }
801 UnicodeString sString(length<0, s, length);
802 return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode);
803 }
804
805 U_DRAFT UBool U_EXPORT2
unorm2_hasBoundaryBefore(const UNormalizer2 * norm2,UChar32 c)806 unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) {
807 return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c);
808 }
809
810 U_DRAFT UBool U_EXPORT2
unorm2_hasBoundaryAfter(const UNormalizer2 * norm2,UChar32 c)811 unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) {
812 return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c);
813 }
814
815 U_DRAFT UBool U_EXPORT2
unorm2_isInert(const UNormalizer2 * norm2,UChar32 c)816 unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) {
817 return ((const Normalizer2 *)norm2)->isInert(c);
818 }
819
820 // Some properties APIs ---------------------------------------------------- ***
821
822 U_CFUNC UNormalizationCheckResult U_EXPORT2
unorm_getQuickCheck(UChar32 c,UNormalizationMode mode)823 unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
824 if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
825 return UNORM_YES;
826 }
827 UErrorCode errorCode=U_ZERO_ERROR;
828 const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
829 if(U_SUCCESS(errorCode)) {
830 return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
831 } else {
832 return UNORM_MAYBE;
833 }
834 }
835
836 U_CAPI const uint16_t * U_EXPORT2
unorm_getFCDTrieIndex(UChar32 & fcdHighStart,UErrorCode * pErrorCode)837 unorm_getFCDTrieIndex(UChar32 &fcdHighStart, UErrorCode *pErrorCode) {
838 const UTrie2 *trie=Normalizer2Factory::getFCDTrie(*pErrorCode);
839 if(U_SUCCESS(*pErrorCode)) {
840 fcdHighStart=trie->highStart;
841 return trie->index;
842 } else {
843 return NULL;
844 }
845 }
846
847 #endif // !UCONFIG_NO_NORMALIZATION
848