• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 **********************************************************************
3 * Copyright (C) 2009, International Business Machines Corporation
4 * and others.  All Rights Reserved.
5 **********************************************************************
6 */
7 /**
8  * IntlTestSpoof tests for USpoofDetector
9  */
10 
11 #include "unicode/utypes.h"
12 
13 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
14 
15 #include "itspoof.h"
16 #include "unicode/uspoof.h"
17 
18 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
19     errcheckln(status, "Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}}
20 
21 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
22     errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);};}
23 
24 #define TEST_ASSERT_EQ(a, b) { if ((a) != (b)) { \
25     errln("Test Failure at file %s, line %d: \"%s\" (%d) != \"%s\" (%d) \n", \
26              __FILE__, __LINE__, #a, (a), #b, (b)); }}
27 
28 #define TEST_ASSERT_NE(a, b) { if ((a) == (b)) { \
29     errln("Test Failure at file %s, line %d: \"%s\" (%d) == \"%s\" (%d) \n", \
30              __FILE__, __LINE__, #a, (a), #b, (b)); }}
31 
32 /*
33  *   TEST_SETUP and TEST_TEARDOWN
34  *         macros to handle the boilerplate around setting up test case.
35  *         Put arbitrary test code between SETUP and TEARDOWN.
36  *         "sc" is the ready-to-go  SpoofChecker for use in the tests.
37  */
38 #define TEST_SETUP {  \
39     UErrorCode status = U_ZERO_ERROR; \
40     USpoofChecker *sc;     \
41     sc = uspoof_open(&status);  \
42     TEST_ASSERT_SUCCESS(status);   \
43     if (U_SUCCESS(status)){
44 
45 #define TEST_TEARDOWN  \
46     }  \
47     TEST_ASSERT_SUCCESS(status);  \
48     uspoof_close(sc);  \
49 }
50 
51 
52 
53 
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)54 void IntlTestSpoof::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
55 {
56     if (exec) logln("TestSuite spoof: ");
57     switch (index) {
58         case 0:
59             name = "TestSpoofAPI";
60             if (exec) {
61                 testSpoofAPI();
62             }
63             break;
64          case 1:
65             name = "TestSkeleton";
66             if (exec) {
67                 testSkeleton();
68             }
69             break;
70          case 2:
71             name = "TestAreConfusable";
72             if (exec) {
73                 testAreConfusable();
74             }
75             break;
76           case 3:
77             name = "TestInvisible";
78             if (exec) {
79                 testInvisible();
80             }
81             break;
82         default: name=""; break;
83     }
84 }
85 
testSpoofAPI()86 void IntlTestSpoof::testSpoofAPI() {
87 
88     TEST_SETUP
89         UnicodeString s("uvw");
90         int32_t position = 666;
91         int32_t checkResults = uspoof_checkUnicodeString(sc, s, &position, &status);
92         TEST_ASSERT_SUCCESS(status);
93         TEST_ASSERT_EQ(0, checkResults);
94         TEST_ASSERT_EQ(666, position);
95     TEST_TEARDOWN;
96 
97     TEST_SETUP
98         UnicodeString s1("cxs");
99         UnicodeString s2 = UnicodeString("\\u0441\\u0445\\u0455").unescape();  // Cyrillic "cxs"
100         int32_t checkResults = uspoof_areConfusableUnicodeString(sc, s1, s2, &status);
101         TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_WHOLE_SCRIPT_CONFUSABLE, checkResults);
102 
103     TEST_TEARDOWN;
104 
105     TEST_SETUP
106         UnicodeString s("I1l0O");
107         UnicodeString dest;
108         UnicodeString &retStr = uspoof_getSkeletonUnicodeString(sc, USPOOF_ANY_CASE, s, dest, &status);
109         TEST_ASSERT_SUCCESS(status);
110         TEST_ASSERT(UnicodeString("11100") == dest);
111         TEST_ASSERT(&dest == &retStr);
112     TEST_TEARDOWN;
113 }
114 
115 
116 #define CHECK_SKELETON(type, input, expected) { \
117     checkSkeleton(sc, type, input, expected, __LINE__); \
118     }
119 
120 
121 // testSkeleton.   Spot check a number of confusable skeleton substitutions from the
122 //                 Unicode data file confusables.txt
123 //                 Test cases chosen for substitutions of various lengths, and
124 //                 membership in different mapping tables.
testSkeleton()125 void IntlTestSpoof::testSkeleton() {
126     const uint32_t ML = 0;
127     const uint32_t SL = USPOOF_SINGLE_SCRIPT_CONFUSABLE;
128     const uint32_t MA = USPOOF_ANY_CASE;
129     const uint32_t SA = USPOOF_SINGLE_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE;
130 
131     TEST_SETUP
132         // A long "identifier" that will overflow implementation stack buffers, forcing heap allocations.
133         CHECK_SKELETON(SL, " A long 'identifier' that will overflow implementation stack buffers, forcing heap allocations."
134                            " A long 'identifier' that will overflow implementation stack buffers, forcing heap allocations."
135                            " A long 'identifier' that will overflow implementation stack buffers, forcing heap allocations."
136                            " A long 'identifier' that will overflow implementation stack buffers, forcing heap allocations.",
137 
138                " A 1ong \\u02b9identifier\\u02b9 that wi11 overf1ow imp1ementation stack buffers, forcing heap a11ocations."
139                " A 1ong \\u02b9identifier\\u02b9 that wi11 overf1ow imp1ementation stack buffers, forcing heap a11ocations."
140                " A 1ong \\u02b9identifier\\u02b9 that wi11 overf1ow imp1ementation stack buffers, forcing heap a11ocations."
141                " A 1ong \\u02b9identifier\\u02b9 that wi11 overf1ow imp1ementation stack buffers, forcing heap a11ocations.")
142 
143         // FC5F ;	FE74 0651 ;   ML  #* ARABIC LIGATURE SHADDA WITH KASRATAN ISOLATED FORM to
144         //                                ARABIC KASRATAN ISOLATED FORM, ARABIC SHADDA
145         //    This character NFKD normalizes to \u0020 \u064d \u0651, so its confusable mapping
146         //    is never used in creating a skeleton.
147         CHECK_SKELETON(SL, "\\uFC5F", " \\u064d\\u0651");
148 
149         CHECK_SKELETON(SL, "nochange", "nochange");
150         CHECK_SKELETON(MA, "love", "1ove");   // lower case l to digit 1
151         CHECK_SKELETON(ML, "OOPS", "OOPS");
152         CHECK_SKELETON(MA, "OOPS", "00PS");   // Letter O to digit 0 in any case mode only
153         CHECK_SKELETON(SL, "\\u059c", "\\u0301");
154         CHECK_SKELETON(SL, "\\u2A74", "\\u003A\\u003A\\u003D");
155         CHECK_SKELETON(SL, "\\u247E", "\\u0028\\u0031\\u0031\\u0029");
156         CHECK_SKELETON(SL, "\\uFDFB", "\\u062C\\u0644\\u0020\\u062C\\u0644\\u0627\\u0644\\u0647");
157 
158         // This mapping exists in the ML and MA tables, does not exist in SL, SA
159         //0C83 ;	0C03 ;	ML	# ( ಃ → ః ) KANNADA SIGN VISARGA → TELUGU SIGN VISARGA	# {source:513}
160         CHECK_SKELETON(SL, "\\u0C83", "\\u0C83");
161         CHECK_SKELETON(SA, "\\u0C83", "\\u0C83");
162         CHECK_SKELETON(ML, "\\u0C83", "\\u0C03");
163         CHECK_SKELETON(MA, "\\u0C83", "\\u0C03");
164 
165         // 0391 ; 0041 ; MA # ( Α → A ) GREEK CAPITAL LETTER ALPHA to LATIN CAPITAL LETTER A
166         // This mapping exists only in the MA table.
167         CHECK_SKELETON(MA, "\\u0391", "A");
168         CHECK_SKELETON(SA, "\\u0391", "\\u0391");
169         CHECK_SKELETON(ML, "\\u0391", "\\u0391");
170         CHECK_SKELETON(SL, "\\u0391", "\\u0391");
171 
172         // 13CF ;  0062 ;  MA  #  CHEROKEE LETTER SI to LATIN SMALL LETTER B
173         // This mapping exists in the ML and MA tables
174         CHECK_SKELETON(ML, "\\u13CF", "b");
175         CHECK_SKELETON(MA, "\\u13CF", "b");
176         CHECK_SKELETON(SL, "\\u13CF", "\\u13CF");
177         CHECK_SKELETON(SA, "\\u13CF", "\\u13CF");
178 
179         // 0022 ;  02B9 02B9 ;  SA  #*  QUOTATION MARK to MODIFIER LETTER PRIME, MODIFIER LETTER PRIME
180         // all tables.
181         CHECK_SKELETON(SL, "\\u0022", "\\u02B9\\u02B9");
182         CHECK_SKELETON(SA, "\\u0022", "\\u02B9\\u02B9");
183         CHECK_SKELETON(ML, "\\u0022", "\\u02B9\\u02B9");
184         CHECK_SKELETON(MA, "\\u0022", "\\u02B9\\u02B9");
185 
186     TEST_TEARDOWN;
187 }
188 
189 
190 //
191 //  Run a single confusable skeleton transformation test case.
192 //
checkSkeleton(const USpoofChecker * sc,uint32_t type,const char * input,const char * expected,int32_t lineNum)193 void IntlTestSpoof::checkSkeleton(const USpoofChecker *sc, uint32_t type,
194                                   const char *input, const char *expected, int32_t lineNum) {
195     UnicodeString uInput = UnicodeString(input).unescape();
196     UnicodeString uExpected = UnicodeString(expected).unescape();
197 
198     UErrorCode status = U_ZERO_ERROR;
199     UnicodeString actual;
200     uspoof_getSkeletonUnicodeString(sc, type, uInput, actual, &status);
201     if (U_FAILURE(status)) {
202         errln("File %s, Line %d, Test case from line %d, status is %s", __FILE__, __LINE__, lineNum,
203               u_errorName(status));
204         return;
205     }
206     if (uExpected != actual) {
207         errln("File %s, Line %d, Test case from line %d, Actual and Expected skeletons differ.",
208                __FILE__, __LINE__, lineNum);
209         errln(UnicodeString(" Actual   Skeleton: \"") + actual + UnicodeString("\"\n") +
210               UnicodeString(" Expected Skeleton: \"") + uExpected + UnicodeString("\""));
211     }
212 }
213 
testAreConfusable()214 void IntlTestSpoof::testAreConfusable() {
215     TEST_SETUP
216         UnicodeString s1("A long string that will overflow stack buffers.  A long string that will overflow stack buffers. "
217                          "A long string that will overflow stack buffers.  A long string that will overflow stack buffers. ");
218         UnicodeString s2("A long string that wi11 overflow stack buffers.  A long string that will overflow stack buffers. "
219                          "A long string that wi11 overflow stack buffers.  A long string that will overflow stack buffers. ");
220         TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE, uspoof_areConfusableUnicodeString(sc, s1, s2, &status));
221         TEST_ASSERT_SUCCESS(status);
222 
223     TEST_TEARDOWN;
224 }
225 
testInvisible()226 void IntlTestSpoof::testInvisible() {
227     TEST_SETUP
228         UnicodeString  s = UnicodeString("abcd\\u0301ef").unescape();
229         int32_t position = -42;
230         TEST_ASSERT_EQ(0, uspoof_checkUnicodeString(sc, s, &position, &status));
231         TEST_ASSERT_SUCCESS(status);
232         TEST_ASSERT(position == -42);
233 
234         UnicodeString  s2 = UnicodeString("abcd\\u0301\\u0302\\u0301ef").unescape();
235         TEST_ASSERT_EQ(USPOOF_INVISIBLE, uspoof_checkUnicodeString(sc, s2, &position, &status));
236         TEST_ASSERT_SUCCESS(status);
237         TEST_ASSERT_EQ(7, position);
238 
239         // Tow acute accents, one from the composed a with acute accent, \u00e1,
240         // and one separate.
241         position = -42;
242         UnicodeString  s3 = UnicodeString("abcd\\u00e1\\u0301xyz").unescape();
243         TEST_ASSERT_EQ(USPOOF_INVISIBLE, uspoof_checkUnicodeString(sc, s3, &position, &status));
244         TEST_ASSERT_SUCCESS(status);
245         TEST_ASSERT_EQ(7, position);
246     TEST_TEARDOWN;
247 }
248 #endif // UCONFIG_NO_REGULAR_EXPRESSIONS
249 
250