• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /********************************************************************
2  * COPYRIGHT:
3  * Copyright (c) 2009-2012, International Business Machines Corporation and
4  * others. All Rights Reserved.
5  ********************************************************************/
6 /********************************************************************************
7 *
8 * File spooftest.c
9 *
10 *********************************************************************************/
11 /*C API TEST for the uspoof Unicode Indentifier Spoofing and Security API */
12 /**
13 *   This is an API test for ICU spoof detection in plain C.  It doesn't test very many cases, and doesn't
14 *   try to test the full functionality.  It just calls each function and verifies that it
15 *   works on a basic level.
16 *
17 *   More complete testing of spoof detection functionality is done with the C++ tests.
18 **/
19 
20 #include "unicode/utypes.h"
21 #if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_NORMALIZATION
22 
23 #include <stdlib.h>
24 #include <stdio.h>
25 #include <string.h>
26 #include "unicode/uspoof.h"
27 #include "unicode/ustring.h"
28 #include "unicode/uset.h"
29 #include "cintltst.h"
30 
31 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
32     log_err_status(status, "Failure at file %s, line %d, error = %s\n", __FILE__, __LINE__, u_errorName(status));}}
33 
34 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
35 log_err("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);};}
36 
37 #define TEST_ASSERT_EQ(a, b) { if ((a) != (b)) { \
38     log_err("Test Failure at file %s, line %d: \"%s\" (%d) != \"%s\" (%d) \n", \
39              __FILE__, __LINE__, #a, (a), #b, (b)); }}
40 
41 #define TEST_ASSERT_NE(a, b) { if ((a) == (b)) { \
42     log_err("Test Failure at file %s, line %d: \"%s\" (%d) == \"%s\" (%d) \n", \
43              __FILE__, __LINE__, #a, (a), #b, (b)); }}
44 
45 
46 /*
47  *   TEST_SETUP and TEST_TEARDOWN
48  *         macros to handle the boilerplate around setting up test case.
49  *         Put arbitrary test code between SETUP and TEARDOWN.
50  *         "sc" is the ready-to-go  SpoofChecker for use in the tests.
51  */
52 #define TEST_SETUP {  \
53     UErrorCode status = U_ZERO_ERROR; \
54     USpoofChecker *sc;     \
55     sc = uspoof_open(&status);  \
56     TEST_ASSERT_SUCCESS(status);   \
57     if (U_SUCCESS(status)){
58 
59 #define TEST_TEARDOWN  \
60     }  \
61     TEST_ASSERT_SUCCESS(status);  \
62     uspoof_close(sc);  \
63 }
64 
65 
66 static void TestUSpoofCAPI(void);
67 
68 void addUSpoofTest(TestNode** root);
69 
addUSpoofTest(TestNode ** root)70 void addUSpoofTest(TestNode** root)
71 {
72 #if !UCONFIG_NO_FILE_IO
73     addTest(root, &TestUSpoofCAPI, "uspoof/TestUSpoofCAPI");
74 #endif
75 }
76 
77 /*
78  *  Identifiers for verifying that spoof checking is minimally alive and working.
79  */
80 const UChar goodLatin[] = {(UChar)0x75, (UChar)0x7a, 0};    /* "uz", all ASCII             */
81                                                             /*   (not confusable)          */
82 const UChar scMixed[]  = {(UChar)0x73, (UChar)0x0441, 0};   /* "sc", with Cyrillic 'c'     */
83                                                             /*   (mixed script, confusable */
84 
85 const UChar scLatin[]  = {(UChar)0x73,  (UChar)0x63, 0};    /* "sc", plain ascii.        */
86 const UChar goodCyrl[] = {(UChar)0x438, (UChar)0x43B, 0};   /* Plain lower case Cyrillic letters,
87                                                                no latin confusables         */
88 
89 const UChar goodGreek[]   = {(UChar)0x3c0, (UChar)0x3c6, 0};   /* Plain lower case Greek letters */
90 
91 const UChar lll_Latin_a[] = {(UChar)0x6c, (UChar)0x49, (UChar)0x31, 0};   /* lI1, all ASCII */
92 
93                              /*  Full-width I, Small Roman Numeral fifty, Latin Cap Letter IOTA*/
94 const UChar lll_Latin_b[] = {(UChar)0xff29, (UChar)0x217c, (UChar)0x196, 0};
95 
96 const UChar lll_Cyrl[]    = {(UChar)0x0406, (UChar)0x04C0, (UChar)0x31, 0};
97 
98 /* The skeleton transform for all of thes 'lll' lookalikes is all lower case l. */
99 const UChar lll_Skel[]    = {(UChar)0x6c, (UChar)0x6c, (UChar)0x6c, 0};
100 
101 const UChar han_Hiragana[] = {(UChar)0x3086, (UChar)0x308A, (UChar)0x0020, (UChar)0x77F3, (UChar)0x7530, 0};
102 
103 /* Provide better code coverage */
104 const char goodLatinUTF8[]    = {0x75, 0x77, 0};
105 /*
106  *   Spoof Detction C API Tests
107  */
TestUSpoofCAPI(void)108 static void TestUSpoofCAPI(void) {
109 
110     /*
111      *  basic uspoof_open().
112      */
113     {
114         USpoofChecker *sc;
115         UErrorCode  status = U_ZERO_ERROR;
116         sc = uspoof_open(&status);
117         TEST_ASSERT_SUCCESS(status);
118         if (U_FAILURE(status)) {
119             /* If things are so broken that we can't even open a default spoof checker,  */
120             /*   don't even try the rest of the tests.  They would all fail.             */
121             return;
122         }
123         uspoof_close(sc);
124     }
125 
126 
127 
128     /*
129      *  Test Open from source rules.
130     */
131     TEST_SETUP
132     const char *dataSrcDir;
133     char       *fileName;
134     char       *confusables;
135     int         confusablesLength;
136     char       *confusablesWholeScript;
137     int         confusablesWholeScriptLength;
138     FILE       *f;
139     UParseError pe;
140     int32_t     errType;
141     USpoofChecker *rsc;
142 
143     dataSrcDir = ctest_dataSrcDir();
144     fileName = malloc(strlen(dataSrcDir) + 100);
145     strcpy(fileName, dataSrcDir);
146     strcat(fileName, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "confusables.txt");
147     f = fopen(fileName, "rb");
148     TEST_ASSERT_NE(f, NULL);
149     confusables = malloc(3000000);
150     if (f != NULL) {
151     confusablesLength = fread(confusables, 1, 3000000, f);
152     fclose(f);
153     }
154 
155     strcpy(fileName, dataSrcDir);
156     strcat(fileName, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "confusablesWholeScript.txt");
157     f = fopen(fileName, "rb");
158     TEST_ASSERT_NE(f, NULL);
159     confusablesWholeScript = malloc(1000000);
160     if (f != NULL) {
161     confusablesWholeScriptLength = fread(confusablesWholeScript, 1, 1000000, f);
162     fclose(f);
163     }
164 
165     rsc = uspoof_openFromSource(confusables, confusablesLength,
166                                               confusablesWholeScript, confusablesWholeScriptLength,
167                                               &errType, &pe, &status);
168     TEST_ASSERT_SUCCESS(status);
169 
170     free(confusablesWholeScript);
171     free(confusables);
172     free(fileName);
173     uspoof_close(rsc);
174     /*  printf("ParseError Line is %d\n", pe.line);  */
175     TEST_TEARDOWN;
176 
177 
178     /*
179      * openFromSerialized and serialize
180     */
181     TEST_SETUP
182         int32_t        serializedSize = 0;
183         int32_t        actualLength = 0;
184         char           *buf;
185         USpoofChecker  *sc2;
186         int32_t         checkResults;
187 
188 
189         serializedSize = uspoof_serialize(sc, NULL, 0, &status);
190         TEST_ASSERT_EQ(status, U_BUFFER_OVERFLOW_ERROR);
191         TEST_ASSERT(serializedSize > 0);
192 
193         /* Serialize the default spoof checker */
194         status = U_ZERO_ERROR;
195         buf = (char *)malloc(serializedSize + 10);
196         TEST_ASSERT(buf != NULL);
197         buf[serializedSize] = 42;
198         uspoof_serialize(sc, buf, serializedSize, &status);
199         TEST_ASSERT_SUCCESS(status);
200         TEST_ASSERT_EQ(42, buf[serializedSize]);
201 
202         /* Create a new spoof checker from the freshly serialized data */
203         sc2 = uspoof_openFromSerialized(buf, serializedSize+10, &actualLength, &status);
204         TEST_ASSERT_SUCCESS(status);
205         TEST_ASSERT_NE(NULL, sc2);
206         TEST_ASSERT_EQ(serializedSize, actualLength);
207 
208         /* Verify that the new spoof checker at least wiggles */
209         checkResults = uspoof_check(sc2, goodLatin, -1, NULL, &status);
210         TEST_ASSERT_SUCCESS(status);
211         TEST_ASSERT_EQ(0, checkResults);
212 
213         checkResults = uspoof_check(sc2, scMixed, -1, NULL, &status);
214         TEST_ASSERT_SUCCESS(status);
215         TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT | USPOOF_MIXED_SCRIPT_CONFUSABLE, checkResults);
216 
217         uspoof_close(sc2);
218         free(buf);
219     TEST_TEARDOWN;
220 
221 
222 
223     /*
224      * Set & Get Check Flags
225     */
226     TEST_SETUP
227         int32_t t;
228         uspoof_setChecks(sc, USPOOF_ALL_CHECKS, &status);
229         TEST_ASSERT_SUCCESS(status);
230         t = uspoof_getChecks(sc, &status);
231         TEST_ASSERT_EQ(t, USPOOF_ALL_CHECKS);
232 
233         uspoof_setChecks(sc, 0, &status);
234         TEST_ASSERT_SUCCESS(status);
235         t = uspoof_getChecks(sc, &status);
236         TEST_ASSERT_EQ(0, t);
237 
238         uspoof_setChecks(sc,
239                         USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE,
240                         &status);
241         TEST_ASSERT_SUCCESS(status);
242         t = uspoof_getChecks(sc, &status);
243         TEST_ASSERT_SUCCESS(status);
244         TEST_ASSERT_EQ(USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE, t);
245     TEST_TEARDOWN;
246 
247     /*
248     * get & setAllowedChars
249     */
250     TEST_SETUP
251         USet *us;
252         const USet *uset;
253 
254         uset = uspoof_getAllowedChars(sc, &status);
255         TEST_ASSERT_SUCCESS(status);
256         TEST_ASSERT(uset_isFrozen(uset));
257         us = uset_open((UChar32)0x41, (UChar32)0x5A);   /*  [A-Z]  */
258         uspoof_setAllowedChars(sc, us, &status);
259         TEST_ASSERT_SUCCESS(status);
260         TEST_ASSERT_NE(us, uspoof_getAllowedChars(sc, &status));
261         TEST_ASSERT(uset_equals(us, uspoof_getAllowedChars(sc, &status)));
262         TEST_ASSERT_SUCCESS(status);
263         uset_close(us);
264     TEST_TEARDOWN;
265 
266     /*
267     *  clone()
268     */
269 
270     TEST_SETUP
271         USpoofChecker *clone1 = NULL;
272         USpoofChecker *clone2 = NULL;
273         int32_t        checkResults = 0;
274 
275         clone1 = uspoof_clone(sc, &status);
276         TEST_ASSERT_SUCCESS(status);
277         TEST_ASSERT_NE(clone1, sc);
278 
279         clone2 = uspoof_clone(clone1, &status);
280         TEST_ASSERT_SUCCESS(status);
281         TEST_ASSERT_NE(clone2, clone1);
282 
283         uspoof_close(clone1);
284 
285         /* Verify that the cloned spoof checker is alive */
286         checkResults = uspoof_check(clone2, goodLatin, -1, NULL, &status);
287         TEST_ASSERT_SUCCESS(status);
288         TEST_ASSERT_EQ(0, checkResults);
289 
290         checkResults = uspoof_check(clone2, scMixed, -1, NULL, &status);
291         TEST_ASSERT_SUCCESS(status);
292         TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT | USPOOF_MIXED_SCRIPT_CONFUSABLE, checkResults);
293         uspoof_close(clone2);
294     TEST_TEARDOWN;
295 
296      /*
297      *  basic uspoof_check()
298      */
299      TEST_SETUP
300          int32_t result;
301          result = uspoof_check(sc, goodLatin, -1, NULL, &status);
302          TEST_ASSERT_SUCCESS(status);
303          TEST_ASSERT_EQ(0, result);
304 
305          result = uspoof_check(sc, han_Hiragana, -1, NULL, &status);
306          TEST_ASSERT_SUCCESS(status);
307          TEST_ASSERT_EQ(0, result);
308 
309          result = uspoof_check(sc, scMixed, -1, NULL, &status);
310          TEST_ASSERT_SUCCESS(status);
311          TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT | USPOOF_MIXED_SCRIPT_CONFUSABLE, result);
312      TEST_TEARDOWN
313 
314 
315     /*
316      *  get & set Checks
317     */
318     TEST_SETUP
319         int32_t   checks;
320         int32_t   checks2;
321         int32_t   checkResults;
322 
323         checks = uspoof_getChecks(sc, &status);
324         TEST_ASSERT_SUCCESS(status);
325         TEST_ASSERT_EQ(USPOOF_ALL_CHECKS, checks);
326 
327         checks &= ~(USPOOF_SINGLE_SCRIPT | USPOOF_MIXED_SCRIPT_CONFUSABLE);
328         uspoof_setChecks(sc, checks, &status);
329         TEST_ASSERT_SUCCESS(status);
330         checks2 = uspoof_getChecks(sc, &status);
331         TEST_ASSERT_EQ(checks, checks2);
332 
333         /* The checks that were disabled just above are the same ones that the "scMixed" test fails.
334             So with those tests gone checking that Identifier should now succeed */
335         checkResults = uspoof_check(sc, scMixed, -1, NULL, &status);
336         TEST_ASSERT_SUCCESS(status);
337         TEST_ASSERT_EQ(0, checkResults);
338     TEST_TEARDOWN;
339 
340     /*
341      *  AllowedLoacles
342      */
343 
344     TEST_SETUP
345         const char  *allowedLocales;
346         int32_t  checkResults;
347 
348         /* Default allowed locales list should be empty */
349         allowedLocales = uspoof_getAllowedLocales(sc, &status);
350         TEST_ASSERT_SUCCESS(status);
351         TEST_ASSERT(strcmp("", allowedLocales) == 0)
352 
353         /* Allow en and ru, which should enable Latin and Cyrillic only to pass */
354         uspoof_setAllowedLocales(sc, "en, ru_RU", &status);
355         TEST_ASSERT_SUCCESS(status);
356         allowedLocales = uspoof_getAllowedLocales(sc, &status);
357         TEST_ASSERT_SUCCESS(status);
358         TEST_ASSERT(strstr(allowedLocales, "en") != NULL);
359         TEST_ASSERT(strstr(allowedLocales, "ru") != NULL);
360 
361         /* Limit checks to USPOOF_CHAR_LIMIT.  Some of the test data has whole script confusables also,
362          * which we don't want to see in this test. */
363         uspoof_setChecks(sc, USPOOF_CHAR_LIMIT, &status);
364         TEST_ASSERT_SUCCESS(status);
365 
366         checkResults = uspoof_check(sc, goodLatin, -1, NULL, &status);
367         TEST_ASSERT_SUCCESS(status);
368         TEST_ASSERT_EQ(0, checkResults);
369 
370         checkResults = uspoof_check(sc, goodGreek, -1, NULL, &status);
371         TEST_ASSERT_SUCCESS(status);
372         TEST_ASSERT_EQ(USPOOF_CHAR_LIMIT, checkResults);
373 
374         checkResults = uspoof_check(sc, goodCyrl, -1, NULL, &status);
375         TEST_ASSERT_SUCCESS(status);
376         TEST_ASSERT_EQ(0, checkResults);
377 
378         /* Reset with an empty locale list, which should allow all characters to pass */
379         uspoof_setAllowedLocales(sc, " ", &status);
380         TEST_ASSERT_SUCCESS(status);
381 
382         checkResults = uspoof_check(sc, goodGreek, -1, NULL, &status);
383         TEST_ASSERT_SUCCESS(status);
384         TEST_ASSERT_EQ(0, checkResults);
385     TEST_TEARDOWN;
386 
387     /*
388      * AllowedChars   set/get the USet of allowed characters.
389      */
390     TEST_SETUP
391         const USet  *set;
392         USet        *tmpSet;
393         int32_t      checkResults;
394 
395         /* By default, we should see no restriction; the USet should allow all characters. */
396         set = uspoof_getAllowedChars(sc, &status);
397         TEST_ASSERT_SUCCESS(status);
398         tmpSet = uset_open(0, 0x10ffff);
399         TEST_ASSERT(uset_equals(tmpSet, set));
400 
401         /* Setting the allowed chars should enable the check. */
402         uspoof_setChecks(sc, USPOOF_ALL_CHECKS & ~USPOOF_CHAR_LIMIT, &status);
403         TEST_ASSERT_SUCCESS(status);
404 
405         /* Remove a character that is in our good Latin test identifier from the allowed chars set. */
406         uset_remove(tmpSet, goodLatin[1]);
407         uspoof_setAllowedChars(sc, tmpSet, &status);
408         TEST_ASSERT_SUCCESS(status);
409         uset_close(tmpSet);
410 
411         /* Latin Identifier should now fail; other non-latin test cases should still be OK */
412         checkResults = uspoof_check(sc, goodLatin, -1, NULL, &status);
413         TEST_ASSERT_SUCCESS(status);
414         TEST_ASSERT_EQ(USPOOF_CHAR_LIMIT, checkResults);
415 
416         checkResults = uspoof_check(sc, goodGreek, -1, NULL, &status);
417         TEST_ASSERT_SUCCESS(status);
418         TEST_ASSERT_EQ(USPOOF_WHOLE_SCRIPT_CONFUSABLE, checkResults);
419     TEST_TEARDOWN;
420 
421     /*
422      * check UTF-8
423      */
424     TEST_SETUP
425         char    utf8buf[200];
426         int32_t checkResults;
427         int32_t position;
428 
429         u_strToUTF8(utf8buf, sizeof(utf8buf), NULL, goodLatin, -1, &status);
430         TEST_ASSERT_SUCCESS(status);
431         position = 666;
432         checkResults = uspoof_checkUTF8(sc, utf8buf, -1, &position, &status);
433         TEST_ASSERT_SUCCESS(status);
434         TEST_ASSERT_EQ(0, checkResults);
435         TEST_ASSERT_EQ(666, position);
436 
437         u_strToUTF8(utf8buf, sizeof(utf8buf), NULL, goodCyrl, -1, &status);
438         TEST_ASSERT_SUCCESS(status);
439         checkResults = uspoof_checkUTF8(sc, utf8buf, -1, &position, &status);
440         TEST_ASSERT_SUCCESS(status);
441         TEST_ASSERT_EQ(0, checkResults);
442 
443         u_strToUTF8(utf8buf, sizeof(utf8buf), NULL, scMixed, -1, &status);
444         TEST_ASSERT_SUCCESS(status);
445         position = 666;
446         checkResults = uspoof_checkUTF8(sc, utf8buf, -1, &position, &status);
447         TEST_ASSERT_SUCCESS(status);
448         TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_SINGLE_SCRIPT , checkResults);
449         TEST_ASSERT_EQ(2, position);
450 
451     TEST_TEARDOWN;
452 
453     /*
454      * uspoof_areConfusable()
455      */
456     TEST_SETUP
457         int32_t  checkResults;
458 
459         checkResults = uspoof_areConfusable(sc, scLatin, -1, scMixed, -1, &status);
460         TEST_ASSERT_SUCCESS(status);
461         TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE, checkResults);
462 
463         checkResults = uspoof_areConfusable(sc, goodGreek, -1, scLatin, -1, &status);
464         TEST_ASSERT_SUCCESS(status);
465         TEST_ASSERT_EQ(0, checkResults);
466 
467         checkResults = uspoof_areConfusable(sc, lll_Latin_a, -1, lll_Latin_b, -1, &status);
468         TEST_ASSERT_SUCCESS(status);
469         TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE, checkResults);
470 
471     TEST_TEARDOWN;
472 
473     /*
474      * areConfusableUTF8
475      */
476     TEST_SETUP
477         int32_t checkResults;
478         char s1[200];
479         char s2[200];
480 
481 
482         u_strToUTF8(s1, sizeof(s1), NULL, scLatin, -1, &status);
483         u_strToUTF8(s2, sizeof(s2), NULL, scMixed, -1, &status);
484         TEST_ASSERT_SUCCESS(status);
485         checkResults = uspoof_areConfusableUTF8(sc, s1, -1, s2, -1, &status);
486         TEST_ASSERT_SUCCESS(status);
487         TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE, checkResults);
488 
489         u_strToUTF8(s1, sizeof(s1), NULL, goodGreek, -1, &status);
490         u_strToUTF8(s2, sizeof(s2), NULL, scLatin, -1, &status);
491         TEST_ASSERT_SUCCESS(status);
492         checkResults = uspoof_areConfusableUTF8(sc, s1, -1, s2, -1, &status);
493         TEST_ASSERT_SUCCESS(status);
494         TEST_ASSERT_EQ(0, checkResults);
495 
496         u_strToUTF8(s1, sizeof(s1), NULL, lll_Latin_a, -1, &status);
497         u_strToUTF8(s2, sizeof(s2), NULL, lll_Latin_b, -1, &status);
498         TEST_ASSERT_SUCCESS(status);
499         checkResults = uspoof_areConfusableUTF8(sc, s1, -1, s2, -1, &status);
500         TEST_ASSERT_SUCCESS(status);
501         TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE, checkResults);
502 
503     TEST_TEARDOWN;
504 
505 
506   /*
507    * getSkeleton
508    */
509 
510     TEST_SETUP
511         UChar dest[100];
512         int32_t   skelLength;
513 
514         skelLength = uspoof_getSkeleton(sc, USPOOF_ANY_CASE, lll_Latin_a, -1, dest, sizeof(dest)/sizeof(UChar), &status);
515         TEST_ASSERT_SUCCESS(status);
516         TEST_ASSERT_EQ(0, u_strcmp(lll_Skel, dest));
517         TEST_ASSERT_EQ(u_strlen(lll_Skel), skelLength);
518 
519         skelLength = uspoof_getSkeletonUTF8(sc, USPOOF_ANY_CASE, goodLatinUTF8, -1, (char*)dest,
520                                             sizeof(dest)/sizeof(UChar), &status);
521         TEST_ASSERT_SUCCESS(status);
522 
523         skelLength = uspoof_getSkeleton(sc, USPOOF_ANY_CASE, lll_Latin_a, -1, NULL, 0, &status);
524         TEST_ASSERT_EQ(U_BUFFER_OVERFLOW_ERROR, status);
525         TEST_ASSERT_EQ(3, skelLength);
526         status = U_ZERO_ERROR;
527 
528     TEST_TEARDOWN;
529 }
530 
531 #endif  /* UCONFIG_NO_REGULAR_EXPRESSIONS */
532