• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 * Copyright (C) 2014, International Business Machines
4 * Corporation and others.  All Rights Reserved.
5 *******************************************************************************
6 * loadednormalizer2impl.h
7 *
8 * created on: 2014sep07
9 * created by: Markus W. Scherer
10 */
11 
12 #ifndef __NORM2ALLMODES_H__
13 #define __NORM2ALLMODES_H__
14 
15 #include "unicode/utypes.h"
16 
17 #if !UCONFIG_NO_NORMALIZATION
18 
19 #include "unicode/normalizer2.h"
20 #include "unicode/unistr.h"
21 #include "cpputils.h"
22 #include "normalizer2impl.h"
23 
24 U_NAMESPACE_BEGIN
25 
26 // Intermediate class:
27 // Has Normalizer2Impl and does boilerplate argument checking and setup.
28 class Normalizer2WithImpl : public Normalizer2 {
29 public:
Normalizer2WithImpl(const Normalizer2Impl & ni)30     Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
31     virtual ~Normalizer2WithImpl();
32 
33     // normalize
34     virtual UnicodeString &
normalize(const UnicodeString & src,UnicodeString & dest,UErrorCode & errorCode)35     normalize(const UnicodeString &src,
36               UnicodeString &dest,
37               UErrorCode &errorCode) const {
38         if(U_FAILURE(errorCode)) {
39             dest.setToBogus();
40             return dest;
41         }
42         const UChar *sArray=src.getBuffer();
43         if(&dest==&src || sArray==NULL) {
44             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
45             dest.setToBogus();
46             return dest;
47         }
48         dest.remove();
49         ReorderingBuffer buffer(impl, dest);
50         if(buffer.init(src.length(), errorCode)) {
51             normalize(sArray, sArray+src.length(), buffer, errorCode);
52         }
53         return dest;
54     }
55     virtual void
56     normalize(const UChar *src, const UChar *limit,
57               ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
58 
59     // normalize and append
60     virtual UnicodeString &
normalizeSecondAndAppend(UnicodeString & first,const UnicodeString & second,UErrorCode & errorCode)61     normalizeSecondAndAppend(UnicodeString &first,
62                              const UnicodeString &second,
63                              UErrorCode &errorCode) const {
64         return normalizeSecondAndAppend(first, second, TRUE, errorCode);
65     }
66     virtual UnicodeString &
append(UnicodeString & first,const UnicodeString & second,UErrorCode & errorCode)67     append(UnicodeString &first,
68            const UnicodeString &second,
69            UErrorCode &errorCode) const {
70         return normalizeSecondAndAppend(first, second, FALSE, errorCode);
71     }
72     UnicodeString &
normalizeSecondAndAppend(UnicodeString & first,const UnicodeString & second,UBool doNormalize,UErrorCode & errorCode)73     normalizeSecondAndAppend(UnicodeString &first,
74                              const UnicodeString &second,
75                              UBool doNormalize,
76                              UErrorCode &errorCode) const {
77         uprv_checkCanGetBuffer(first, errorCode);
78         if(U_FAILURE(errorCode)) {
79             return first;
80         }
81         const UChar *secondArray=second.getBuffer();
82         if(&first==&second || secondArray==NULL) {
83             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
84             return first;
85         }
86         int32_t firstLength=first.length();
87         UnicodeString safeMiddle;
88         {
89             ReorderingBuffer buffer(impl, first);
90             if(buffer.init(firstLength+second.length(), errorCode)) {
91                 normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
92                                    safeMiddle, buffer, errorCode);
93             }
94         }  // The ReorderingBuffer destructor finalizes the first string.
95         if(U_FAILURE(errorCode)) {
96             // Restore the modified suffix of the first string.
97             first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
98         }
99         return first;
100     }
101     virtual void
102     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
103                        UnicodeString &safeMiddle,
104                        ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
105     virtual UBool
getDecomposition(UChar32 c,UnicodeString & decomposition)106     getDecomposition(UChar32 c, UnicodeString &decomposition) const {
107         UChar buffer[4];
108         int32_t length;
109         const UChar *d=impl.getDecomposition(c, buffer, length);
110         if(d==NULL) {
111             return FALSE;
112         }
113         if(d==buffer) {
114             decomposition.setTo(buffer, length);  // copy the string (Jamos from Hangul syllable c)
115         } else {
116             decomposition.setTo(FALSE, d, length);  // read-only alias
117         }
118         return TRUE;
119     }
120     virtual UBool
getRawDecomposition(UChar32 c,UnicodeString & decomposition)121     getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
122         UChar buffer[30];
123         int32_t length;
124         const UChar *d=impl.getRawDecomposition(c, buffer, length);
125         if(d==NULL) {
126             return FALSE;
127         }
128         if(d==buffer) {
129             decomposition.setTo(buffer, length);  // copy the string (algorithmic decomposition)
130         } else {
131             decomposition.setTo(FALSE, d, length);  // read-only alias
132         }
133         return TRUE;
134     }
135     virtual UChar32
composePair(UChar32 a,UChar32 b)136     composePair(UChar32 a, UChar32 b) const {
137         return impl.composePair(a, b);
138     }
139 
140     virtual uint8_t
getCombiningClass(UChar32 c)141     getCombiningClass(UChar32 c) const {
142         return impl.getCC(impl.getNorm16(c));
143     }
144 
145     // quick checks
146     virtual UBool
isNormalized(const UnicodeString & s,UErrorCode & errorCode)147     isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
148         if(U_FAILURE(errorCode)) {
149             return FALSE;
150         }
151         const UChar *sArray=s.getBuffer();
152         if(sArray==NULL) {
153             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
154             return FALSE;
155         }
156         const UChar *sLimit=sArray+s.length();
157         return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
158     }
159     virtual UNormalizationCheckResult
quickCheck(const UnicodeString & s,UErrorCode & errorCode)160     quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
161         return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
162     }
163     virtual int32_t
spanQuickCheckYes(const UnicodeString & s,UErrorCode & errorCode)164     spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
165         if(U_FAILURE(errorCode)) {
166             return 0;
167         }
168         const UChar *sArray=s.getBuffer();
169         if(sArray==NULL) {
170             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
171             return 0;
172         }
173         return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
174     }
175     virtual const UChar *
176     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0;
177 
getQuickCheck(UChar32)178     virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
179         return UNORM_YES;
180     }
181 
182     const Normalizer2Impl &impl;
183 };
184 
185 class DecomposeNormalizer2 : public Normalizer2WithImpl {
186 public:
DecomposeNormalizer2(const Normalizer2Impl & ni)187     DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
188     virtual ~DecomposeNormalizer2();
189 
190 private:
191     virtual void
normalize(const UChar * src,const UChar * limit,ReorderingBuffer & buffer,UErrorCode & errorCode)192     normalize(const UChar *src, const UChar *limit,
193               ReorderingBuffer &buffer, UErrorCode &errorCode) const {
194         impl.decompose(src, limit, &buffer, errorCode);
195     }
196     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
197     virtual void
normalizeAndAppend(const UChar * src,const UChar * limit,UBool doNormalize,UnicodeString & safeMiddle,ReorderingBuffer & buffer,UErrorCode & errorCode)198     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
199                        UnicodeString &safeMiddle,
200                        ReorderingBuffer &buffer, UErrorCode &errorCode) const {
201         impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
202     }
203     virtual const UChar *
spanQuickCheckYes(const UChar * src,const UChar * limit,UErrorCode & errorCode)204     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
205         return impl.decompose(src, limit, NULL, errorCode);
206     }
207     using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
getQuickCheck(UChar32 c)208     virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
209         return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
210     }
hasBoundaryBefore(UChar32 c)211     virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); }
hasBoundaryAfter(UChar32 c)212     virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); }
isInert(UChar32 c)213     virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
214 };
215 
216 class ComposeNormalizer2 : public Normalizer2WithImpl {
217 public:
ComposeNormalizer2(const Normalizer2Impl & ni,UBool fcc)218     ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
219         Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
220     virtual ~ComposeNormalizer2();
221 
222 private:
223     virtual void
normalize(const UChar * src,const UChar * limit,ReorderingBuffer & buffer,UErrorCode & errorCode)224     normalize(const UChar *src, const UChar *limit,
225               ReorderingBuffer &buffer, UErrorCode &errorCode) const {
226         impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
227     }
228     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
229     virtual void
normalizeAndAppend(const UChar * src,const UChar * limit,UBool doNormalize,UnicodeString & safeMiddle,ReorderingBuffer & buffer,UErrorCode & errorCode)230     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
231                        UnicodeString &safeMiddle,
232                        ReorderingBuffer &buffer, UErrorCode &errorCode) const {
233         impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
234     }
235 
236     virtual UBool
isNormalized(const UnicodeString & s,UErrorCode & errorCode)237     isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
238         if(U_FAILURE(errorCode)) {
239             return FALSE;
240         }
241         const UChar *sArray=s.getBuffer();
242         if(sArray==NULL) {
243             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
244             return FALSE;
245         }
246         UnicodeString temp;
247         ReorderingBuffer buffer(impl, temp);
248         if(!buffer.init(5, errorCode)) {  // small destCapacity for substring normalization
249             return FALSE;
250         }
251         return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode);
252     }
253     virtual UNormalizationCheckResult
quickCheck(const UnicodeString & s,UErrorCode & errorCode)254     quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
255         if(U_FAILURE(errorCode)) {
256             return UNORM_MAYBE;
257         }
258         const UChar *sArray=s.getBuffer();
259         if(sArray==NULL) {
260             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
261             return UNORM_MAYBE;
262         }
263         UNormalizationCheckResult qcResult=UNORM_YES;
264         impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
265         return qcResult;
266     }
267     virtual const UChar *
spanQuickCheckYes(const UChar * src,const UChar * limit,UErrorCode &)268     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const {
269         return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
270     }
271     using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
getQuickCheck(UChar32 c)272     virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
273         return impl.getCompQuickCheck(impl.getNorm16(c));
274     }
hasBoundaryBefore(UChar32 c)275     virtual UBool hasBoundaryBefore(UChar32 c) const {
276         return impl.hasCompBoundaryBefore(c);
277     }
hasBoundaryAfter(UChar32 c)278     virtual UBool hasBoundaryAfter(UChar32 c) const {
279         return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE);
280     }
isInert(UChar32 c)281     virtual UBool isInert(UChar32 c) const {
282         return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE);
283     }
284 
285     const UBool onlyContiguous;
286 };
287 
288 class FCDNormalizer2 : public Normalizer2WithImpl {
289 public:
FCDNormalizer2(const Normalizer2Impl & ni)290     FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
291     virtual ~FCDNormalizer2();
292 
293 private:
294     virtual void
normalize(const UChar * src,const UChar * limit,ReorderingBuffer & buffer,UErrorCode & errorCode)295     normalize(const UChar *src, const UChar *limit,
296               ReorderingBuffer &buffer, UErrorCode &errorCode) const {
297         impl.makeFCD(src, limit, &buffer, errorCode);
298     }
299     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
300     virtual void
normalizeAndAppend(const UChar * src,const UChar * limit,UBool doNormalize,UnicodeString & safeMiddle,ReorderingBuffer & buffer,UErrorCode & errorCode)301     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
302                        UnicodeString &safeMiddle,
303                        ReorderingBuffer &buffer, UErrorCode &errorCode) const {
304         impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
305     }
306     virtual const UChar *
spanQuickCheckYes(const UChar * src,const UChar * limit,UErrorCode & errorCode)307     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
308         return impl.makeFCD(src, limit, NULL, errorCode);
309     }
310     using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
hasBoundaryBefore(UChar32 c)311     virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
hasBoundaryAfter(UChar32 c)312     virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
isInert(UChar32 c)313     virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
314 };
315 
316 struct Norm2AllModes : public UMemory {
Norm2AllModesNorm2AllModes317     Norm2AllModes(Normalizer2Impl *i)
318             : impl(i), comp(*i, FALSE), decomp(*i), fcd(*i), fcc(*i, TRUE) {}
319     ~Norm2AllModes();
320 
321     static Norm2AllModes *createInstance(Normalizer2Impl *impl, UErrorCode &errorCode);
322     static Norm2AllModes *createNFCInstance(UErrorCode &errorCode);
323     static Norm2AllModes *createInstance(const char *packageName,
324                                          const char *name,
325                                          UErrorCode &errorCode);
326 
327     static const Norm2AllModes *getNFCInstance(UErrorCode &errorCode);
328     static const Norm2AllModes *getNFKCInstance(UErrorCode &errorCode);
329     static const Norm2AllModes *getNFKC_CFInstance(UErrorCode &errorCode);
330 
331     Normalizer2Impl *impl;
332     ComposeNormalizer2 comp;
333     DecomposeNormalizer2 decomp;
334     FCDNormalizer2 fcd;
335     ComposeNormalizer2 fcc;
336 };
337 
338 U_NAMESPACE_END
339 
340 #endif  // !UCONFIG_NO_NORMALIZATION
341 #endif  // __NORM2ALLMODES_H__
342