• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 2014, International Business Machines
6 * Corporation and others.  All Rights Reserved.
7 *******************************************************************************
8 * norm2allmodes.h
9 *
10 * created on: 2014sep07
11 * created by: Markus W. Scherer
12 */
13 
14 #ifndef __NORM2ALLMODES_H__
15 #define __NORM2ALLMODES_H__
16 
17 #include "unicode/utypes.h"
18 
19 #if !UCONFIG_NO_NORMALIZATION
20 
21 #include "unicode/edits.h"
22 #include "unicode/normalizer2.h"
23 #include "unicode/stringoptions.h"
24 #include "unicode/unistr.h"
25 #include "cpputils.h"
26 #include "normalizer2impl.h"
27 
28 U_NAMESPACE_BEGIN
29 
30 // Intermediate class:
31 // Has Normalizer2Impl and does boilerplate argument checking and setup.
32 class Normalizer2WithImpl : public Normalizer2 {
33 public:
Normalizer2WithImpl(const Normalizer2Impl & ni)34     Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
35     virtual ~Normalizer2WithImpl();
36 
37     // normalize
38     virtual UnicodeString &
normalize(const UnicodeString & src,UnicodeString & dest,UErrorCode & errorCode)39     normalize(const UnicodeString &src,
40               UnicodeString &dest,
41               UErrorCode &errorCode) const override {
42         if(U_FAILURE(errorCode)) {
43             dest.setToBogus();
44             return dest;
45         }
46         const char16_t *sArray=src.getBuffer();
47         if(&dest==&src || sArray==nullptr) {
48             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
49             dest.setToBogus();
50             return dest;
51         }
52         dest.remove();
53         ReorderingBuffer buffer(impl, dest);
54         if(buffer.init(src.length(), errorCode)) {
55             normalize(sArray, sArray+src.length(), buffer, errorCode);
56         }
57         return dest;
58     }
59     virtual void
60     normalize(const char16_t *src, const char16_t *limit,
61               ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
62 
63     // normalize and append
64     virtual UnicodeString &
normalizeSecondAndAppend(UnicodeString & first,const UnicodeString & second,UErrorCode & errorCode)65     normalizeSecondAndAppend(UnicodeString &first,
66                              const UnicodeString &second,
67                              UErrorCode &errorCode) const override {
68         return normalizeSecondAndAppend(first, second, true, errorCode);
69     }
70     virtual UnicodeString &
append(UnicodeString & first,const UnicodeString & second,UErrorCode & errorCode)71     append(UnicodeString &first,
72            const UnicodeString &second,
73            UErrorCode &errorCode) const override {
74         return normalizeSecondAndAppend(first, second, false, errorCode);
75     }
76     UnicodeString &
normalizeSecondAndAppend(UnicodeString & first,const UnicodeString & second,UBool doNormalize,UErrorCode & errorCode)77     normalizeSecondAndAppend(UnicodeString &first,
78                              const UnicodeString &second,
79                              UBool doNormalize,
80                              UErrorCode &errorCode) const {
81         uprv_checkCanGetBuffer(first, errorCode);
82         if(U_FAILURE(errorCode)) {
83             return first;
84         }
85         const char16_t *secondArray=second.getBuffer();
86         if(&first==&second || secondArray==nullptr) {
87             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
88             return first;
89         }
90         int32_t firstLength=first.length();
91         UnicodeString safeMiddle;
92         {
93             ReorderingBuffer buffer(impl, first);
94             if(buffer.init(firstLength+second.length(), errorCode)) {
95                 normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
96                                    safeMiddle, buffer, errorCode);
97             }
98         }  // The ReorderingBuffer destructor finalizes the first string.
99         if(U_FAILURE(errorCode)) {
100             // Restore the modified suffix of the first string.
101             first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
102         }
103         return first;
104     }
105     virtual void
106     normalizeAndAppend(const char16_t *src, const char16_t *limit, UBool doNormalize,
107                        UnicodeString &safeMiddle,
108                        ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
109     virtual UBool
getDecomposition(UChar32 c,UnicodeString & decomposition)110     getDecomposition(UChar32 c, UnicodeString &decomposition) const override {
111         char16_t buffer[4];
112         int32_t length;
113         const char16_t *d=impl.getDecomposition(c, buffer, length);
114         if(d==nullptr) {
115             return false;
116         }
117         if(d==buffer) {
118             decomposition.setTo(buffer, length);  // copy the string (Jamos from Hangul syllable c)
119         } else {
120             decomposition.setTo(false, d, length);  // read-only alias
121         }
122         return true;
123     }
124     virtual UBool
getRawDecomposition(UChar32 c,UnicodeString & decomposition)125     getRawDecomposition(UChar32 c, UnicodeString &decomposition) const override {
126         char16_t buffer[30];
127         int32_t length;
128         const char16_t *d=impl.getRawDecomposition(c, buffer, length);
129         if(d==nullptr) {
130             return false;
131         }
132         if(d==buffer) {
133             decomposition.setTo(buffer, length);  // copy the string (algorithmic decomposition)
134         } else {
135             decomposition.setTo(false, d, length);  // read-only alias
136         }
137         return true;
138     }
139     virtual UChar32
composePair(UChar32 a,UChar32 b)140     composePair(UChar32 a, UChar32 b) const override {
141         return impl.composePair(a, b);
142     }
143 
144     virtual uint8_t
getCombiningClass(UChar32 c)145     getCombiningClass(UChar32 c) const override {
146         return impl.getCC(impl.getNorm16(c));
147     }
148 
149     // quick checks
150     virtual UBool
isNormalized(const UnicodeString & s,UErrorCode & errorCode)151     isNormalized(const UnicodeString &s, UErrorCode &errorCode) const override {
152         if(U_FAILURE(errorCode)) {
153             return false;
154         }
155         const char16_t *sArray=s.getBuffer();
156         if(sArray==nullptr) {
157             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
158             return false;
159         }
160         const char16_t *sLimit=sArray+s.length();
161         return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
162     }
163     virtual UNormalizationCheckResult
quickCheck(const UnicodeString & s,UErrorCode & errorCode)164     quickCheck(const UnicodeString &s, UErrorCode &errorCode) const override {
165         return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
166     }
167     virtual int32_t
spanQuickCheckYes(const UnicodeString & s,UErrorCode & errorCode)168     spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const override {
169         if(U_FAILURE(errorCode)) {
170             return 0;
171         }
172         const char16_t *sArray=s.getBuffer();
173         if(sArray==nullptr) {
174             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
175             return 0;
176         }
177         return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
178     }
179     virtual const char16_t *
180     spanQuickCheckYes(const char16_t *src, const char16_t *limit, UErrorCode &errorCode) const = 0;
181 
getQuickCheck(UChar32)182     virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
183         return UNORM_YES;
184     }
185 
186     const Normalizer2Impl &impl;
187 };
188 
189 class DecomposeNormalizer2 : public Normalizer2WithImpl {
190 public:
DecomposeNormalizer2(const Normalizer2Impl & ni)191     DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
192     virtual ~DecomposeNormalizer2();
193 
194 private:
195     virtual void
normalize(const char16_t * src,const char16_t * limit,ReorderingBuffer & buffer,UErrorCode & errorCode)196     normalize(const char16_t *src, const char16_t *limit,
197               ReorderingBuffer &buffer, UErrorCode &errorCode) const override {
198         impl.decompose(src, limit, &buffer, errorCode);
199     }
200     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
201     virtual void
normalizeAndAppend(const char16_t * src,const char16_t * limit,UBool doNormalize,UnicodeString & safeMiddle,ReorderingBuffer & buffer,UErrorCode & errorCode)202     normalizeAndAppend(const char16_t *src, const char16_t *limit, UBool doNormalize,
203                        UnicodeString &safeMiddle,
204                        ReorderingBuffer &buffer, UErrorCode &errorCode) const override {
205         impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
206     }
207 
208     void
normalizeUTF8(uint32_t options,StringPiece src,ByteSink & sink,Edits * edits,UErrorCode & errorCode)209     normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
210                   Edits *edits, UErrorCode &errorCode) const override {
211         if (U_FAILURE(errorCode)) {
212             return;
213         }
214         if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
215             edits->reset();
216         }
217         const uint8_t *s = reinterpret_cast<const uint8_t *>(src.data());
218         impl.decomposeUTF8(options, s, s + src.length(), &sink, edits, errorCode);
219         sink.Flush();
220     }
221     virtual UBool
isNormalizedUTF8(StringPiece sp,UErrorCode & errorCode)222     isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const override {
223         if(U_FAILURE(errorCode)) {
224             return false;
225         }
226         const uint8_t *s = reinterpret_cast<const uint8_t *>(sp.data());
227         const uint8_t *sLimit = s + sp.length();
228         return sLimit == impl.decomposeUTF8(0, s, sLimit, nullptr, nullptr, errorCode);
229     }
230 
231     virtual const char16_t *
spanQuickCheckYes(const char16_t * src,const char16_t * limit,UErrorCode & errorCode)232     spanQuickCheckYes(const char16_t *src, const char16_t *limit, UErrorCode &errorCode) const override {
233         return impl.decompose(src, limit, nullptr, errorCode);
234     }
235     using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
getQuickCheck(UChar32 c)236     virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const override {
237         return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
238     }
hasBoundaryBefore(UChar32 c)239     virtual UBool hasBoundaryBefore(UChar32 c) const override {
240         return impl.hasDecompBoundaryBefore(c);
241     }
hasBoundaryAfter(UChar32 c)242     virtual UBool hasBoundaryAfter(UChar32 c) const override {
243         return impl.hasDecompBoundaryAfter(c);
244     }
isInert(UChar32 c)245     virtual UBool isInert(UChar32 c) const override {
246         return impl.isDecompInert(c);
247     }
248 };
249 
250 class ComposeNormalizer2 : public Normalizer2WithImpl {
251 public:
ComposeNormalizer2(const Normalizer2Impl & ni,UBool fcc)252     ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
253         Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
254     virtual ~ComposeNormalizer2();
255 
256 private:
257     virtual void
normalize(const char16_t * src,const char16_t * limit,ReorderingBuffer & buffer,UErrorCode & errorCode)258     normalize(const char16_t *src, const char16_t *limit,
259               ReorderingBuffer &buffer, UErrorCode &errorCode) const override {
260         impl.compose(src, limit, onlyContiguous, true, buffer, errorCode);
261     }
262     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
263 
264     void
normalizeUTF8(uint32_t options,StringPiece src,ByteSink & sink,Edits * edits,UErrorCode & errorCode)265     normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
266                   Edits *edits, UErrorCode &errorCode) const override {
267         if (U_FAILURE(errorCode)) {
268             return;
269         }
270         if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
271             edits->reset();
272         }
273         const uint8_t *s = reinterpret_cast<const uint8_t *>(src.data());
274         impl.composeUTF8(options, onlyContiguous, s, s + src.length(),
275                          &sink, edits, errorCode);
276         sink.Flush();
277     }
278 
279     virtual void
normalizeAndAppend(const char16_t * src,const char16_t * limit,UBool doNormalize,UnicodeString & safeMiddle,ReorderingBuffer & buffer,UErrorCode & errorCode)280     normalizeAndAppend(const char16_t *src, const char16_t *limit, UBool doNormalize,
281                        UnicodeString &safeMiddle,
282                        ReorderingBuffer &buffer, UErrorCode &errorCode) const override {
283         impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
284     }
285 
286     virtual UBool
isNormalized(const UnicodeString & s,UErrorCode & errorCode)287     isNormalized(const UnicodeString &s, UErrorCode &errorCode) const override {
288         if(U_FAILURE(errorCode)) {
289             return false;
290         }
291         const char16_t *sArray=s.getBuffer();
292         if(sArray==nullptr) {
293             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
294             return false;
295         }
296         UnicodeString temp;
297         ReorderingBuffer buffer(impl, temp);
298         if(!buffer.init(5, errorCode)) {  // small destCapacity for substring normalization
299             return false;
300         }
301         return impl.compose(sArray, sArray+s.length(), onlyContiguous, false, buffer, errorCode);
302     }
303     virtual UBool
isNormalizedUTF8(StringPiece sp,UErrorCode & errorCode)304     isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const override {
305         if(U_FAILURE(errorCode)) {
306             return false;
307         }
308         const uint8_t *s = reinterpret_cast<const uint8_t *>(sp.data());
309         return impl.composeUTF8(0, onlyContiguous, s, s + sp.length(), nullptr, nullptr, errorCode);
310     }
311     virtual UNormalizationCheckResult
quickCheck(const UnicodeString & s,UErrorCode & errorCode)312     quickCheck(const UnicodeString &s, UErrorCode &errorCode) const override {
313         if(U_FAILURE(errorCode)) {
314             return UNORM_MAYBE;
315         }
316         const char16_t *sArray=s.getBuffer();
317         if(sArray==nullptr) {
318             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
319             return UNORM_MAYBE;
320         }
321         UNormalizationCheckResult qcResult=UNORM_YES;
322         impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
323         return qcResult;
324     }
325     virtual const char16_t *
spanQuickCheckYes(const char16_t * src,const char16_t * limit,UErrorCode &)326     spanQuickCheckYes(const char16_t *src, const char16_t *limit, UErrorCode &) const override {
327         return impl.composeQuickCheck(src, limit, onlyContiguous, nullptr);
328     }
329     using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
getQuickCheck(UChar32 c)330     virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const override {
331         return impl.getCompQuickCheck(impl.getNorm16(c));
332     }
hasBoundaryBefore(UChar32 c)333     virtual UBool hasBoundaryBefore(UChar32 c) const override {
334         return impl.hasCompBoundaryBefore(c);
335     }
hasBoundaryAfter(UChar32 c)336     virtual UBool hasBoundaryAfter(UChar32 c) const override {
337         return impl.hasCompBoundaryAfter(c, onlyContiguous);
338     }
isInert(UChar32 c)339     virtual UBool isInert(UChar32 c) const override {
340         return impl.isCompInert(c, onlyContiguous);
341     }
342 
343     const UBool onlyContiguous;
344 };
345 
346 class FCDNormalizer2 : public Normalizer2WithImpl {
347 public:
FCDNormalizer2(const Normalizer2Impl & ni)348     FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
349     virtual ~FCDNormalizer2();
350 
351 private:
352     virtual void
normalize(const char16_t * src,const char16_t * limit,ReorderingBuffer & buffer,UErrorCode & errorCode)353     normalize(const char16_t *src, const char16_t *limit,
354               ReorderingBuffer &buffer, UErrorCode &errorCode) const override {
355         impl.makeFCD(src, limit, &buffer, errorCode);
356     }
357     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
358     virtual void
normalizeAndAppend(const char16_t * src,const char16_t * limit,UBool doNormalize,UnicodeString & safeMiddle,ReorderingBuffer & buffer,UErrorCode & errorCode)359     normalizeAndAppend(const char16_t *src, const char16_t *limit, UBool doNormalize,
360                        UnicodeString &safeMiddle,
361                        ReorderingBuffer &buffer, UErrorCode &errorCode) const override {
362         impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
363     }
364     virtual const char16_t *
spanQuickCheckYes(const char16_t * src,const char16_t * limit,UErrorCode & errorCode)365     spanQuickCheckYes(const char16_t *src, const char16_t *limit, UErrorCode &errorCode) const override {
366         return impl.makeFCD(src, limit, nullptr, errorCode);
367     }
368     using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
hasBoundaryBefore(UChar32 c)369     virtual UBool hasBoundaryBefore(UChar32 c) const override {
370         return impl.hasFCDBoundaryBefore(c);
371     }
hasBoundaryAfter(UChar32 c)372     virtual UBool hasBoundaryAfter(UChar32 c) const override {
373         return impl.hasFCDBoundaryAfter(c);
374     }
isInert(UChar32 c)375     virtual UBool isInert(UChar32 c) const override {
376         return impl.isFCDInert(c);
377     }
378 };
379 
380 struct Norm2AllModes : public UMemory {
Norm2AllModesNorm2AllModes381     Norm2AllModes(Normalizer2Impl *i)
382             : impl(i), comp(*i, false), decomp(*i), fcd(*i), fcc(*i, true) {}
383     ~Norm2AllModes();
384 
385     static Norm2AllModes *createInstance(Normalizer2Impl *impl, UErrorCode &errorCode);
386     static Norm2AllModes *createNFCInstance(UErrorCode &errorCode);
387     static Norm2AllModes *createInstance(const char *packageName,
388                                          const char *name,
389                                          UErrorCode &errorCode);
390 
391     static const Norm2AllModes *getNFCInstance(UErrorCode &errorCode);
392     static const Norm2AllModes *getNFKCInstance(UErrorCode &errorCode);
393     static const Norm2AllModes *getNFKC_CFInstance(UErrorCode &errorCode);
394     static const Norm2AllModes *getNFKC_SCFInstance(UErrorCode &errorCode);
395 
396     Normalizer2Impl *impl;
397     ComposeNormalizer2 comp;
398     DecomposeNormalizer2 decomp;
399     FCDNormalizer2 fcd;
400     ComposeNormalizer2 fcc;
401 };
402 
403 U_NAMESPACE_END
404 
405 #endif  // !UCONFIG_NO_NORMALIZATION
406 #endif  // __NORM2ALLMODES_H__
407