1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2005, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6
7 #include "unicode/utypes.h"
8 #include "unicode/uchar.h"
9 #include "unicode/normlzr.h"
10 #include "unicode/uniset.h"
11 #include "unicode/usetiter.h"
12 #include "unicode/schriter.h"
13 #include "unormimp.h"
14 #include "tstnorm.h"
15
16 #if !UCONFIG_NO_NORMALIZATION
17
18 static UErrorCode status = U_ZERO_ERROR;
19
20 // test APIs that are not otherwise used - improve test coverage
21 void
TestNormalizerAPI()22 BasicNormalizerTest::TestNormalizerAPI() {
23 // instantiate a Normalizer from a CharacterIterator
24 UnicodeString s=UnicodeString("a\\u0308\\uac00\\U0002f800", "").unescape();
25 s.append(s); // make s a bit longer and more interesting
26 StringCharacterIterator iter(s);
27 Normalizer norm(iter, UNORM_NFC);
28 if(norm.next()!=0xe4) {
29 errln("error in Normalizer(CharacterIterator).next()");
30 }
31
32 // test copy constructor
33 Normalizer copy(norm);
34 if(copy.next()!=0xac00) {
35 errln("error in Normalizer(Normalizer(CharacterIterator)).next()");
36 }
37
38 // test clone(), ==, and hashCode()
39 Normalizer *clone=copy.clone();
40 if(*clone!=copy) {
41 errln("error in Normalizer(Normalizer(CharacterIterator)).clone()!=copy");
42 }
43 // clone must have the same hashCode()
44 if(clone->hashCode()!=copy.hashCode()) {
45 errln("error in Normalizer(Normalizer(CharacterIterator)).clone()->hashCode()!=copy.hashCode()");
46 }
47 if(clone->next()!=0x4e3d) {
48 errln("error in Normalizer(Normalizer(CharacterIterator)).clone()->next()");
49 }
50 // position changed, must change hashCode()
51 if(clone->hashCode()==copy.hashCode()) {
52 errln("error in Normalizer(Normalizer(CharacterIterator)).clone()->next().hashCode()==copy.hashCode()");
53 }
54 delete clone;
55 clone=0;
56
57 // test compose() and decompose()
58 UnicodeString tel, nfkc, nfkd;
59 tel=UnicodeString(1, (UChar32)0x2121, 10);
60 tel.insert(1, (UChar)0x301);
61
62 UErrorCode errorCode=U_ZERO_ERROR;
63 Normalizer::compose(tel, TRUE, 0, nfkc, errorCode);
64 Normalizer::decompose(tel, TRUE, 0, nfkd, errorCode);
65 if(U_FAILURE(errorCode)) {
66 errln("error in Normalizer::(de)compose(): %s", u_errorName(errorCode));
67 } else if(
68 nfkc!=UnicodeString("TE\\u0139TELTELTELTELTELTELTELTELTEL", "").unescape() ||
69 nfkd!=UnicodeString("TEL\\u0301TELTELTELTELTELTELTELTELTEL", "").unescape()
70 ) {
71 errln("error in Normalizer::(de)compose(): wrong result(s)");
72 }
73
74 // test setIndex()
75 norm.setIndexOnly(3);
76 if(norm.current()!=0x4e3d) {
77 errln("error in Normalizer(CharacterIterator).setIndex(3)");
78 }
79
80 // test setText(CharacterIterator) and getText()
81 UnicodeString out, out2;
82 errorCode=U_ZERO_ERROR;
83 copy.setText(iter, errorCode);
84 if(U_FAILURE(errorCode)) {
85 errln("error Normalizer::setText() failed: %s", u_errorName(errorCode));
86 } else {
87 copy.getText(out);
88 iter.getText(out2);
89 if( out!=out2 ||
90 copy.startIndex()!=iter.startIndex() ||
91 copy.endIndex()!=iter.endIndex()
92 ) {
93 errln("error in Normalizer::setText() or Normalizer::getText()");
94 }
95 }
96
97 // test setText(UChar *), getUMode() and setMode()
98 errorCode=U_ZERO_ERROR;
99 copy.setText(s.getBuffer()+1, s.length()-1, errorCode);
100 copy.setMode(UNORM_NFD);
101 if(copy.getUMode()!=UNORM_NFD) {
102 errln("error in Normalizer::setMode() or Normalizer::getUMode()");
103 }
104 if(copy.next()!=0x308 || copy.next()!=0x1100) {
105 errln("error in Normalizer::setText(UChar *) or Normalizer::setMode()");
106 }
107
108 // test setText(UChar *, length=-1)
109 errorCode=U_ZERO_ERROR;
110
111 // NUL-terminate s
112 s.append((UChar)0); // append NUL
113 s.truncate(s.length()-1); // undo length change
114
115 copy.setText(s.getBuffer()+1, -1, errorCode);
116 if(copy.endIndex()!=s.length()-1) {
117 errln("error in Normalizer::setText(UChar *, -1)");
118 }
119
120 // test setOption() and getOption()
121 copy.setOption(0xaa0000, TRUE);
122 copy.setOption(0x20000, FALSE);
123 if(!copy.getOption(0x880000) || copy.getOption(0x20000)) {
124 errln("error in Normalizer::setOption() or Normalizer::getOption()");
125 }
126
127 // test last()/previous() with an internal buffer overflow
128 errorCode=U_ZERO_ERROR;
129 copy.setText(UnicodeString(1000, (UChar32)0x308, 1000), errorCode);
130 if(copy.last()!=0x308) {
131 errln("error in Normalizer(1000*U+0308).last()");
132 }
133
134 // test UNORM_NONE
135 norm.setMode(UNORM_NONE);
136 if(norm.first()!=0x61 || norm.next()!=0x308 || norm.last()!=0x2f800) {
137 errln("error in Normalizer(UNORM_NONE).first()/next()/last()");
138 }
139 Normalizer::normalize(s, UNORM_NONE, 0, out, status);
140 if(out!=s) {
141 errln("error in Normalizer::normalize(UNORM_NONE)");
142 }
143
144 // test that the same string can be used as source and destination
145 s.setTo((UChar)0xe4);
146 Normalizer::normalize(s, UNORM_NFD, 0, s, status);
147 if(s.charAt(1)!=0x308) {
148 errln("error in Normalizer::normalize(UNORM_NFD, self)");
149 }
150 Normalizer::normalize(s, UNORM_NFC, 0, s, status);
151 if(s.charAt(0)!=0xe4) {
152 errln("error in Normalizer::normalize(UNORM_NFC, self)");
153 }
154 Normalizer::decompose(s, FALSE, 0, s, status);
155 if(s.charAt(1)!=0x308) {
156 errln("error in Normalizer::decompose(self)");
157 }
158 Normalizer::compose(s, FALSE, 0, s, status);
159 if(s.charAt(0)!=0xe4) {
160 errln("error in Normalizer::compose(self)");
161 }
162 Normalizer::concatenate(s, s, s, UNORM_NFC, 0, status);
163 if(s.charAt(1)!=0xe4) {
164 errln("error in Normalizer::decompose(self)");
165 }
166
167 // test internal normalization exclusion options
168 // s contains a compatibility CJK character and a Hangul syllable
169 s=UnicodeString("a\\uFACE\\uD7A3b", -1, US_INV).unescape();
170 status=U_ZERO_ERROR;
171 Normalizer::decompose(s, FALSE, UNORM_NX_HANGUL, out, status);
172 if(U_FAILURE(status) || out!=UNICODE_STRING_SIMPLE("a\\u9F9C\\uD7A3b").unescape()) {
173 errln("Normalizer::decompose(UNORM_NX_HANGUL) failed - %s", u_errorName(status));
174 }
175 status=U_ZERO_ERROR;
176 Normalizer::decompose(s, FALSE, UNORM_NX_CJK_COMPAT, out, status);
177 if(U_FAILURE(status) || out!=UNICODE_STRING_SIMPLE("a\\uFACE\\u1112\\u1175\\u11c2b").unescape()) {
178 errln("Normalizer::decompose(UNORM_NX_CJK_COMPAT) failed - %s", u_errorName(status));
179 }
180 status=U_ZERO_ERROR;
181 Normalizer::decompose(s, FALSE, UNORM_NX_CJK_COMPAT|UNORM_NX_HANGUL, out, status);
182 if(U_FAILURE(status) || out!=UNICODE_STRING_SIMPLE("a\\uFACE\\uD7A3b").unescape()) {
183 errln("Normalizer::decompose(UNORM_NX_CJK_COMPAT|UNORM_NX_HANGUL) failed - %s", u_errorName(status));
184 }
185 }
186
187 #endif
188