• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ***************************************************************************
5 * Copyright (C) 2008-2015, International Business Machines Corporation
6 * and others. All Rights Reserved.
7 ***************************************************************************
8 *   file name:  uspoof.cpp
9 *   encoding:   UTF-8
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2008Feb13
14 *   created by: Andy Heninger
15 *
16 *   Unicode Spoof Detection
17 */
18 #include "unicode/ubidi.h"
19 #include "unicode/utypes.h"
20 #include "unicode/normalizer2.h"
21 #include "unicode/uspoof.h"
22 #include "unicode/ustring.h"
23 #include "unicode/utf16.h"
24 #include "cmemory.h"
25 #include "cstring.h"
26 #include "mutex.h"
27 #include "scriptset.h"
28 #include "uassert.h"
29 #include "ucln_in.h"
30 #include "uspoof_impl.h"
31 #include "umutex.h"
32 
33 
34 #if !UCONFIG_NO_NORMALIZATION
35 
36 U_NAMESPACE_USE
37 
38 
39 //
40 // Static Objects used by the spoof impl, their thread safe initialization and their cleanup.
41 //
42 static UnicodeSet *gInclusionSet = nullptr;
43 static UnicodeSet *gRecommendedSet = nullptr;
44 static const Normalizer2 *gNfdNormalizer = nullptr;
45 static UInitOnce gSpoofInitStaticsOnce {};
46 
47 namespace {
48 
49 UBool U_CALLCONV
uspoof_cleanup()50 uspoof_cleanup() {
51     delete gInclusionSet;
52     gInclusionSet = nullptr;
53     delete gRecommendedSet;
54     gRecommendedSet = nullptr;
55     gNfdNormalizer = nullptr;
56     gSpoofInitStaticsOnce.reset();
57     return true;
58 }
59 
initializeStatics(UErrorCode & status)60 void U_CALLCONV initializeStatics(UErrorCode &status) {
61     gInclusionSet = new UnicodeSet();
62     gRecommendedSet = new UnicodeSet();
63     if (gInclusionSet == nullptr || gRecommendedSet == nullptr) {
64         status = U_MEMORY_ALLOCATION_ERROR;
65         delete gInclusionSet;
66         gInclusionSet = nullptr;
67         delete gRecommendedSet;
68         gRecommendedSet = nullptr;
69         return;
70     }
71     gInclusionSet->applyIntPropertyValue(UCHAR_IDENTIFIER_TYPE, U_ID_TYPE_INCLUSION, status);
72     gRecommendedSet->applyIntPropertyValue(UCHAR_IDENTIFIER_TYPE, U_ID_TYPE_RECOMMENDED, status);
73     if (U_FAILURE(status)) {
74         delete gInclusionSet;
75         gInclusionSet = nullptr;
76         delete gRecommendedSet;
77         gRecommendedSet = nullptr;
78         return;
79     }
80     gInclusionSet->freeze();
81     gRecommendedSet->freeze();
82     gNfdNormalizer = Normalizer2::getNFDInstance(status);
83     ucln_i18n_registerCleanup(UCLN_I18N_SPOOF, uspoof_cleanup);
84 }
85 
86 }  // namespace
87 
uspoof_internalInitStatics(UErrorCode * status)88 U_CFUNC void uspoof_internalInitStatics(UErrorCode *status) {
89     umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
90 }
91 
92 U_CAPI USpoofChecker * U_EXPORT2
uspoof_open(UErrorCode * status)93 uspoof_open(UErrorCode *status) {
94     umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
95     if (U_FAILURE(*status)) {
96         return nullptr;
97     }
98     SpoofImpl *si = new SpoofImpl(*status);
99     if (si == nullptr) {
100         *status = U_MEMORY_ALLOCATION_ERROR;
101         return nullptr;
102     }
103     if (U_FAILURE(*status)) {
104         delete si;
105         return nullptr;
106     }
107     return si->asUSpoofChecker();
108 }
109 
110 
111 U_CAPI USpoofChecker * U_EXPORT2
uspoof_openFromSerialized(const void * data,int32_t length,int32_t * pActualLength,UErrorCode * status)112 uspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLength,
113                           UErrorCode *status) {
114     if (U_FAILURE(*status)) {
115         return nullptr;
116     }
117 
118     if (data == nullptr) {
119         *status = U_ILLEGAL_ARGUMENT_ERROR;
120         return nullptr;
121     }
122 
123     umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
124     if (U_FAILURE(*status))
125     {
126         return nullptr;
127     }
128 
129     SpoofData *sd = new SpoofData(data, length, *status);
130     if (sd == nullptr) {
131         *status = U_MEMORY_ALLOCATION_ERROR;
132         return nullptr;
133     }
134 
135     if (U_FAILURE(*status)) {
136         delete sd;
137         return nullptr;
138     }
139 
140     SpoofImpl *si = new SpoofImpl(sd, *status);
141     if (si == nullptr) {
142         *status = U_MEMORY_ALLOCATION_ERROR;
143         delete sd; // explicit delete as the destructor for si won't be called.
144         return nullptr;
145     }
146 
147     if (U_FAILURE(*status)) {
148         delete si; // no delete for sd, as the si destructor will delete it.
149         return nullptr;
150     }
151 
152     if (pActualLength != nullptr) {
153         *pActualLength = sd->size();
154     }
155     return si->asUSpoofChecker();
156 }
157 
158 
159 U_CAPI USpoofChecker * U_EXPORT2
uspoof_clone(const USpoofChecker * sc,UErrorCode * status)160 uspoof_clone(const USpoofChecker *sc, UErrorCode *status) {
161     const SpoofImpl *src = SpoofImpl::validateThis(sc, *status);
162     if (src == nullptr) {
163         return nullptr;
164     }
165     SpoofImpl *result = new SpoofImpl(*src, *status);   // copy constructor
166     if (result == nullptr) {
167         *status = U_MEMORY_ALLOCATION_ERROR;
168         return nullptr;
169     }
170     if (U_FAILURE(*status)) {
171         delete result;
172         result = nullptr;
173     }
174     return result->asUSpoofChecker();
175 }
176 
177 
178 U_CAPI void U_EXPORT2
uspoof_close(USpoofChecker * sc)179 uspoof_close(USpoofChecker *sc) {
180     UErrorCode status = U_ZERO_ERROR;
181     SpoofImpl *This = SpoofImpl::validateThis(sc, status);
182     delete This;
183 }
184 
185 
186 U_CAPI void U_EXPORT2
uspoof_setChecks(USpoofChecker * sc,int32_t checks,UErrorCode * status)187 uspoof_setChecks(USpoofChecker *sc, int32_t checks, UErrorCode *status) {
188     SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
189     if (This == nullptr) {
190         return;
191     }
192 
193     // Verify that the requested checks are all ones (bits) that
194     //   are acceptable, known values.
195     if (checks & ~(USPOOF_ALL_CHECKS | USPOOF_AUX_INFO)) {
196         *status = U_ILLEGAL_ARGUMENT_ERROR;
197         return;
198     }
199 
200     This->fChecks = checks;
201 }
202 
203 
204 U_CAPI int32_t U_EXPORT2
uspoof_getChecks(const USpoofChecker * sc,UErrorCode * status)205 uspoof_getChecks(const USpoofChecker *sc, UErrorCode *status) {
206     const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
207     if (This == nullptr) {
208         return 0;
209     }
210     return This->fChecks;
211 }
212 
213 U_CAPI void U_EXPORT2
uspoof_setRestrictionLevel(USpoofChecker * sc,URestrictionLevel restrictionLevel)214 uspoof_setRestrictionLevel(USpoofChecker *sc, URestrictionLevel restrictionLevel) {
215     UErrorCode status = U_ZERO_ERROR;
216     SpoofImpl *This = SpoofImpl::validateThis(sc, status);
217     if (This != nullptr) {
218         This->fRestrictionLevel = restrictionLevel;
219         This->fChecks |= USPOOF_RESTRICTION_LEVEL;
220     }
221 }
222 
223 U_CAPI URestrictionLevel U_EXPORT2
uspoof_getRestrictionLevel(const USpoofChecker * sc)224 uspoof_getRestrictionLevel(const USpoofChecker *sc) {
225     UErrorCode status = U_ZERO_ERROR;
226     const SpoofImpl *This = SpoofImpl::validateThis(sc, status);
227     if (This == nullptr) {
228         return USPOOF_UNRESTRICTIVE;
229     }
230     return This->fRestrictionLevel;
231 }
232 
233 U_CAPI void U_EXPORT2
uspoof_setAllowedLocales(USpoofChecker * sc,const char * localesList,UErrorCode * status)234 uspoof_setAllowedLocales(USpoofChecker *sc, const char *localesList, UErrorCode *status) {
235     SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
236     if (This == nullptr) {
237         return;
238     }
239     This->setAllowedLocales(localesList, *status);
240 }
241 
242 U_CAPI const char * U_EXPORT2
uspoof_getAllowedLocales(USpoofChecker * sc,UErrorCode * status)243 uspoof_getAllowedLocales(USpoofChecker *sc, UErrorCode *status) {
244     SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
245     if (This == nullptr) {
246         return nullptr;
247     }
248     return This->getAllowedLocales(*status);
249 }
250 
251 
252 U_CAPI const USet * U_EXPORT2
uspoof_getAllowedChars(const USpoofChecker * sc,UErrorCode * status)253 uspoof_getAllowedChars(const USpoofChecker *sc, UErrorCode *status) {
254     const UnicodeSet *result = uspoof_getAllowedUnicodeSet(sc, status);
255     return result->toUSet();
256 }
257 
258 U_CAPI const UnicodeSet * U_EXPORT2
uspoof_getAllowedUnicodeSet(const USpoofChecker * sc,UErrorCode * status)259 uspoof_getAllowedUnicodeSet(const USpoofChecker *sc, UErrorCode *status) {
260     const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
261     if (This == nullptr) {
262         return nullptr;
263     }
264     return This->fAllowedCharsSet;
265 }
266 
267 
268 U_CAPI void U_EXPORT2
uspoof_setAllowedChars(USpoofChecker * sc,const USet * chars,UErrorCode * status)269 uspoof_setAllowedChars(USpoofChecker *sc, const USet *chars, UErrorCode *status) {
270     const UnicodeSet *set = UnicodeSet::fromUSet(chars);
271     uspoof_setAllowedUnicodeSet(sc, set, status);
272 }
273 
274 
275 U_CAPI void U_EXPORT2
uspoof_setAllowedUnicodeSet(USpoofChecker * sc,const UnicodeSet * chars,UErrorCode * status)276 uspoof_setAllowedUnicodeSet(USpoofChecker *sc, const UnicodeSet *chars, UErrorCode *status) {
277     SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
278     if (This == nullptr) {
279         return;
280     }
281     if (chars->isBogus()) {
282         *status = U_ILLEGAL_ARGUMENT_ERROR;
283         return;
284     }
285     UnicodeSet *clonedSet = chars->clone();
286     if (clonedSet == nullptr || clonedSet->isBogus()) {
287         *status = U_MEMORY_ALLOCATION_ERROR;
288         return;
289     }
290     clonedSet->freeze();
291     delete This->fAllowedCharsSet;
292     This->fAllowedCharsSet = clonedSet;
293     This->fChecks |= USPOOF_CHAR_LIMIT;
294 }
295 
296 
297 U_CAPI int32_t U_EXPORT2
uspoof_check(const USpoofChecker * sc,const char16_t * id,int32_t length,int32_t * position,UErrorCode * status)298 uspoof_check(const USpoofChecker *sc,
299              const char16_t *id, int32_t length,
300              int32_t *position,
301              UErrorCode *status) {
302 
303     // Backwards compatibility:
304     if (position != nullptr) {
305         *position = 0;
306     }
307 
308     // Delegate to uspoof_check2
309     return uspoof_check2(sc, id, length, nullptr, status);
310 }
311 
312 
313 U_CAPI int32_t U_EXPORT2
uspoof_check2(const USpoofChecker * sc,const char16_t * id,int32_t length,USpoofCheckResult * checkResult,UErrorCode * status)314 uspoof_check2(const USpoofChecker *sc,
315     const char16_t* id, int32_t length,
316     USpoofCheckResult* checkResult,
317     UErrorCode *status) {
318 
319     const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
320     if (This == nullptr) {
321         return 0;
322     }
323     if (length < -1) {
324         *status = U_ILLEGAL_ARGUMENT_ERROR;
325         return 0;
326     }
327     UnicodeString idStr((length == -1), id, length);  // Aliasing constructor.
328     int32_t result = uspoof_check2UnicodeString(sc, idStr, checkResult, status);
329     return result;
330 }
331 
332 
333 U_CAPI int32_t U_EXPORT2
uspoof_checkUTF8(const USpoofChecker * sc,const char * id,int32_t length,int32_t * position,UErrorCode * status)334 uspoof_checkUTF8(const USpoofChecker *sc,
335                  const char *id, int32_t length,
336                  int32_t *position,
337                  UErrorCode *status) {
338 
339     // Backwards compatibility:
340     if (position != nullptr) {
341         *position = 0;
342     }
343 
344     // Delegate to uspoof_check2
345     return uspoof_check2UTF8(sc, id, length, nullptr, status);
346 }
347 
348 
349 U_CAPI int32_t U_EXPORT2
uspoof_check2UTF8(const USpoofChecker * sc,const char * id,int32_t length,USpoofCheckResult * checkResult,UErrorCode * status)350 uspoof_check2UTF8(const USpoofChecker *sc,
351     const char *id, int32_t length,
352     USpoofCheckResult* checkResult,
353     UErrorCode *status) {
354 
355     if (U_FAILURE(*status)) {
356         return 0;
357     }
358     UnicodeString idStr = UnicodeString::fromUTF8(StringPiece(id, length>=0 ? length : static_cast<int32_t>(uprv_strlen(id))));
359     int32_t result = uspoof_check2UnicodeString(sc, idStr, checkResult, status);
360     return result;
361 }
362 
363 
364 U_CAPI int32_t U_EXPORT2
uspoof_areConfusable(const USpoofChecker * sc,const char16_t * id1,int32_t length1,const char16_t * id2,int32_t length2,UErrorCode * status)365 uspoof_areConfusable(const USpoofChecker *sc,
366                      const char16_t *id1, int32_t length1,
367                      const char16_t *id2, int32_t length2,
368                      UErrorCode *status) {
369     SpoofImpl::validateThis(sc, *status);
370     if (U_FAILURE(*status)) {
371         return 0;
372     }
373     if (length1 < -1 || length2 < -1) {
374         *status = U_ILLEGAL_ARGUMENT_ERROR;
375         return 0;
376     }
377 
378     UnicodeString id1Str((length1==-1), id1, length1);  // Aliasing constructor
379     UnicodeString id2Str((length2==-1), id2, length2);  // Aliasing constructor
380     return uspoof_areConfusableUnicodeString(sc, id1Str, id2Str, status);
381 }
382 
383 
384 U_CAPI int32_t U_EXPORT2
uspoof_areConfusableUTF8(const USpoofChecker * sc,const char * id1,int32_t length1,const char * id2,int32_t length2,UErrorCode * status)385 uspoof_areConfusableUTF8(const USpoofChecker *sc,
386                          const char *id1, int32_t length1,
387                          const char *id2, int32_t length2,
388                          UErrorCode *status) {
389     SpoofImpl::validateThis(sc, *status);
390     if (U_FAILURE(*status)) {
391         return 0;
392     }
393     if (length1 < -1 || length2 < -1) {
394         *status = U_ILLEGAL_ARGUMENT_ERROR;
395         return 0;
396     }
397     UnicodeString id1Str = UnicodeString::fromUTF8(StringPiece(id1, length1>=0? length1 : static_cast<int32_t>(uprv_strlen(id1))));
398     UnicodeString id2Str = UnicodeString::fromUTF8(StringPiece(id2, length2>=0? length2 : static_cast<int32_t>(uprv_strlen(id2))));
399     int32_t results = uspoof_areConfusableUnicodeString(sc, id1Str, id2Str, status);
400     return results;
401 }
402 
403 
404 U_CAPI int32_t U_EXPORT2
uspoof_areConfusableUnicodeString(const USpoofChecker * sc,const icu::UnicodeString & id1,const icu::UnicodeString & id2,UErrorCode * status)405 uspoof_areConfusableUnicodeString(const USpoofChecker *sc,
406                                   const icu::UnicodeString &id1,
407                                   const icu::UnicodeString &id2,
408                                   UErrorCode *status) {
409     const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
410     if (U_FAILURE(*status)) {
411         return 0;
412     }
413     //
414     // See section 4 of UAX 39 for the algorithm for checking whether two strings are confusable,
415     //   and for definitions of the types (single, whole, mixed-script) of confusables.
416 
417     // We only care about a few of the check flags.  Ignore the others.
418     // If no tests relevant to this function have been specified, return an error.
419     // TODO:  is this really the right thing to do?  It's probably an error on the caller's part,
420     //        but logically we would just return 0 (no error).
421     if ((This->fChecks & USPOOF_CONFUSABLE) == 0) {
422         *status = U_INVALID_STATE_ERROR;
423         return 0;
424     }
425 
426     // Compute the skeletons and check for confusability.
427     UnicodeString id1Skeleton;
428     uspoof_getSkeletonUnicodeString(sc, 0 /* deprecated */, id1, id1Skeleton, status);
429     UnicodeString id2Skeleton;
430     uspoof_getSkeletonUnicodeString(sc, 0 /* deprecated */, id2, id2Skeleton, status);
431     if (U_FAILURE(*status)) { return 0; }
432     if (id1Skeleton != id2Skeleton) {
433         return 0;
434     }
435 
436     // If we get here, the strings are confusable.  Now we just need to set the flags for the appropriate classes
437     // of confusables according to UTS 39 section 4.
438     // Start by computing the resolved script sets of id1 and id2.
439     ScriptSet id1RSS;
440     This->getResolvedScriptSet(id1, id1RSS, *status);
441     ScriptSet id2RSS;
442     This->getResolvedScriptSet(id2, id2RSS, *status);
443 
444     // Turn on all applicable flags
445     int32_t result = 0;
446     if (id1RSS.intersects(id2RSS)) {
447         result |= USPOOF_SINGLE_SCRIPT_CONFUSABLE;
448     } else {
449         result |= USPOOF_MIXED_SCRIPT_CONFUSABLE;
450         if (!id1RSS.isEmpty() && !id2RSS.isEmpty()) {
451             result |= USPOOF_WHOLE_SCRIPT_CONFUSABLE;
452         }
453     }
454 
455     // Turn off flags that the user doesn't want
456     if ((This->fChecks & USPOOF_SINGLE_SCRIPT_CONFUSABLE) == 0) {
457         result &= ~USPOOF_SINGLE_SCRIPT_CONFUSABLE;
458     }
459     if ((This->fChecks & USPOOF_MIXED_SCRIPT_CONFUSABLE) == 0) {
460         result &= ~USPOOF_MIXED_SCRIPT_CONFUSABLE;
461     }
462     if ((This->fChecks & USPOOF_WHOLE_SCRIPT_CONFUSABLE) == 0) {
463         result &= ~USPOOF_WHOLE_SCRIPT_CONFUSABLE;
464     }
465 
466     return result;
467 }
468 
uspoof_areBidiConfusable(const USpoofChecker * sc,UBiDiDirection direction,const char16_t * id1,int32_t length1,const char16_t * id2,int32_t length2,UErrorCode * status)469 U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusable(const USpoofChecker *sc, UBiDiDirection direction,
470                                                   const char16_t *id1, int32_t length1,
471                                                   const char16_t *id2, int32_t length2,
472                                                    UErrorCode *status) {
473     UnicodeString id1Str((length1 == -1), id1, length1); // Aliasing constructor
474     UnicodeString id2Str((length2 == -1), id2, length2); // Aliasing constructor
475     if (id1Str.isBogus() || id2Str.isBogus()) {
476         *status = U_ILLEGAL_ARGUMENT_ERROR;
477         return 0;
478     }
479     return uspoof_areBidiConfusableUnicodeString(sc, direction, id1Str, id2Str, status);
480 }
481 
uspoof_areBidiConfusableUTF8(const USpoofChecker * sc,UBiDiDirection direction,const char * id1,int32_t length1,const char * id2,int32_t length2,UErrorCode * status)482 U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusableUTF8(const USpoofChecker *sc, UBiDiDirection direction,
483                                                       const char *id1, int32_t length1, const char *id2,
484                                                       int32_t length2, UErrorCode *status) {
485     if (length1 < -1 || length2 < -1) {
486         *status = U_ILLEGAL_ARGUMENT_ERROR;
487         return 0;
488     }
489     UnicodeString id1Str = UnicodeString::fromUTF8(
490         StringPiece(id1, length1 >= 0 ? length1 : static_cast<int32_t>(uprv_strlen(id1))));
491     UnicodeString id2Str = UnicodeString::fromUTF8(
492         StringPiece(id2, length2 >= 0 ? length2 : static_cast<int32_t>(uprv_strlen(id2))));
493     return uspoof_areBidiConfusableUnicodeString(sc, direction, id1Str, id2Str, status);
494 }
495 
uspoof_areBidiConfusableUnicodeString(const USpoofChecker * sc,UBiDiDirection direction,const icu::UnicodeString & id1,const icu::UnicodeString & id2,UErrorCode * status)496 U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusableUnicodeString(const USpoofChecker *sc,
497                                                                UBiDiDirection direction,
498                                                                const icu::UnicodeString &id1,
499                                                                const icu::UnicodeString &id2,
500                                                                UErrorCode *status) {
501     const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
502     if (U_FAILURE(*status)) {
503         return 0;
504     }
505     //
506     // See section 4 of UTS 39 for the algorithm for checking whether two strings are confusable,
507     //   and for definitions of the types (single, whole, mixed-script) of confusables.
508 
509     // We only care about a few of the check flags.  Ignore the others.
510     // If no tests relevant to this function have been specified, return an error.
511     // TODO:  is this really the right thing to do?  It's probably an error on the caller's part,
512     //        but logically we would just return 0 (no error).
513     if ((This->fChecks & USPOOF_CONFUSABLE) == 0) {
514         *status = U_INVALID_STATE_ERROR;
515         return 0;
516     }
517 
518     // Compute the skeletons and check for confusability.
519     UnicodeString id1Skeleton;
520     uspoof_getBidiSkeletonUnicodeString(sc, direction, id1, id1Skeleton, status);
521     UnicodeString id2Skeleton;
522     uspoof_getBidiSkeletonUnicodeString(sc, direction, id2, id2Skeleton, status);
523     if (U_FAILURE(*status)) {
524         return 0;
525     }
526     if (id1Skeleton != id2Skeleton) {
527         return 0;
528     }
529 
530     // If we get here, the strings are confusable.  Now we just need to set the flags for the appropriate
531     // classes of confusables according to UTS 39 section 4. Start by computing the resolved script sets
532     // of id1 and id2.
533     ScriptSet id1RSS;
534     This->getResolvedScriptSet(id1, id1RSS, *status);
535     ScriptSet id2RSS;
536     This->getResolvedScriptSet(id2, id2RSS, *status);
537 
538     // Turn on all applicable flags
539     uint32_t result = 0;
540     if (id1RSS.intersects(id2RSS)) {
541         result |= USPOOF_SINGLE_SCRIPT_CONFUSABLE;
542     } else {
543         result |= USPOOF_MIXED_SCRIPT_CONFUSABLE;
544         if (!id1RSS.isEmpty() && !id2RSS.isEmpty()) {
545             result |= USPOOF_WHOLE_SCRIPT_CONFUSABLE;
546         }
547     }
548 
549     // Turn off flags that the user doesn't want
550     return result & This->fChecks;
551 }
552 
553 
554 U_CAPI int32_t U_EXPORT2
uspoof_checkUnicodeString(const USpoofChecker * sc,const icu::UnicodeString & id,int32_t * position,UErrorCode * status)555 uspoof_checkUnicodeString(const USpoofChecker *sc,
556                           const icu::UnicodeString &id,
557                           int32_t *position,
558                           UErrorCode *status) {
559 
560     // Backwards compatibility:
561     if (position != nullptr) {
562         *position = 0;
563     }
564 
565     // Delegate to uspoof_check2
566     return uspoof_check2UnicodeString(sc, id, nullptr, status);
567 }
568 
569 namespace {
570 
checkImpl(const SpoofImpl * This,const UnicodeString & id,CheckResult * checkResult,UErrorCode * status)571 int32_t checkImpl(const SpoofImpl* This, const UnicodeString& id, CheckResult* checkResult, UErrorCode* status) {
572     U_ASSERT(This != nullptr);
573     U_ASSERT(checkResult != nullptr);
574     checkResult->clear();
575     int32_t result = 0;
576 
577     if (0 != (This->fChecks & USPOOF_RESTRICTION_LEVEL)) {
578         URestrictionLevel idRestrictionLevel = This->getRestrictionLevel(id, *status);
579         if (idRestrictionLevel > This->fRestrictionLevel) {
580             result |= USPOOF_RESTRICTION_LEVEL;
581         }
582         checkResult->fRestrictionLevel = idRestrictionLevel;
583     }
584 
585     if (0 != (This->fChecks & USPOOF_MIXED_NUMBERS)) {
586         UnicodeSet numerics;
587         This->getNumerics(id, numerics, *status);
588         if (numerics.size() > 1) {
589             result |= USPOOF_MIXED_NUMBERS;
590         }
591         checkResult->fNumerics = numerics;  // UnicodeSet::operator=
592     }
593 
594     if (0 != (This->fChecks & USPOOF_HIDDEN_OVERLAY)) {
595         int32_t index = This->findHiddenOverlay(id, *status);
596         if (index != -1) {
597             result |= USPOOF_HIDDEN_OVERLAY;
598         }
599     }
600 
601 
602     if (0 != (This->fChecks & USPOOF_CHAR_LIMIT)) {
603         int32_t i;
604         UChar32 c;
605         int32_t length = id.length();
606         for (i=0; i<length ;) {
607             c = id.char32At(i);
608             i += U16_LENGTH(c);
609             if (!This->fAllowedCharsSet->contains(c)) {
610                 result |= USPOOF_CHAR_LIMIT;
611                 break;
612             }
613         }
614     }
615 
616     if (0 != (This->fChecks & USPOOF_INVISIBLE)) {
617         // This check needs to be done on NFD input
618         UnicodeString nfdText;
619         gNfdNormalizer->normalize(id, nfdText, *status);
620         int32_t nfdLength = nfdText.length();
621 
622         // scan for more than one occurrence of the same non-spacing mark
623         // in a sequence of non-spacing marks.
624         int32_t     i;
625         UChar32     c;
626         UChar32     firstNonspacingMark = 0;
627         UBool       haveMultipleMarks = false;
628         UnicodeSet  marksSeenSoFar;   // Set of combining marks in a single combining sequence.
629 
630         for (i=0; i<nfdLength ;) {
631             c = nfdText.char32At(i);
632             i += U16_LENGTH(c);
633             if (u_charType(c) != U_NON_SPACING_MARK) {
634                 firstNonspacingMark = 0;
635                 if (haveMultipleMarks) {
636                     marksSeenSoFar.clear();
637                     haveMultipleMarks = false;
638                 }
639                 continue;
640             }
641             if (firstNonspacingMark == 0) {
642                 firstNonspacingMark = c;
643                 continue;
644             }
645             if (!haveMultipleMarks) {
646                 marksSeenSoFar.add(firstNonspacingMark);
647                 haveMultipleMarks = true;
648             }
649             if (marksSeenSoFar.contains(c)) {
650                 // report the error, and stop scanning.
651                 // No need to find more than the first failure.
652                 result |= USPOOF_INVISIBLE;
653                 break;
654             }
655             marksSeenSoFar.add(c);
656         }
657     }
658 
659     checkResult->fChecks = result;
660     return checkResult->toCombinedBitmask(This->fChecks);
661 }
662 
663 }  // namespace
664 
665 U_CAPI int32_t U_EXPORT2
uspoof_check2UnicodeString(const USpoofChecker * sc,const icu::UnicodeString & id,USpoofCheckResult * checkResult,UErrorCode * status)666 uspoof_check2UnicodeString(const USpoofChecker *sc,
667                           const icu::UnicodeString &id,
668                           USpoofCheckResult* checkResult,
669                           UErrorCode *status) {
670     const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
671     if (This == nullptr) {
672         return false;
673     }
674 
675     if (checkResult != nullptr) {
676         CheckResult* ThisCheckResult = CheckResult::validateThis(checkResult, *status);
677         if (ThisCheckResult == nullptr) {
678             return false;
679         }
680         return checkImpl(This, id, ThisCheckResult, status);
681     } else {
682         // Stack-allocate the checkResult since this method doesn't return it
683         CheckResult stackCheckResult;
684         return checkImpl(This, id, &stackCheckResult, status);
685     }
686 }
687 
688 
689 U_CAPI int32_t U_EXPORT2
uspoof_getSkeleton(const USpoofChecker * sc,uint32_t type,const char16_t * id,int32_t length,char16_t * dest,int32_t destCapacity,UErrorCode * status)690 uspoof_getSkeleton(const USpoofChecker *sc,
691                    uint32_t type,
692                    const char16_t *id,  int32_t length,
693                    char16_t *dest, int32_t destCapacity,
694                    UErrorCode *status) {
695 
696     SpoofImpl::validateThis(sc, *status);
697     if (U_FAILURE(*status)) {
698         return 0;
699     }
700     if (length<-1 || destCapacity<0 || (destCapacity==0 && dest!=nullptr)) {
701         *status = U_ILLEGAL_ARGUMENT_ERROR;
702         return 0;
703     }
704 
705     UnicodeString idStr((length==-1), id, length);  // Aliasing constructor
706     UnicodeString destStr;
707     uspoof_getSkeletonUnicodeString(sc, type, idStr, destStr, status);
708     destStr.extract(dest, destCapacity, *status);
709     return destStr.length();
710 }
711 
uspoof_getBidiSkeleton(const USpoofChecker * sc,UBiDiDirection direction,const UChar * id,int32_t length,UChar * dest,int32_t destCapacity,UErrorCode * status)712 U_CAPI int32_t U_EXPORT2 uspoof_getBidiSkeleton(const USpoofChecker *sc, UBiDiDirection direction,
713                                                 const UChar *id, int32_t length, UChar *dest,
714                                                 int32_t destCapacity, UErrorCode *status) {
715     UnicodeString idStr((length == -1), id, length); // Aliasing constructor
716     if (idStr.isBogus()) {
717         *status = U_ILLEGAL_ARGUMENT_ERROR;
718         return 0;
719     }
720     UnicodeString destStr;
721     uspoof_getBidiSkeletonUnicodeString(sc, direction, idStr, destStr, status);
722     return destStr.extract(dest, destCapacity, *status);
723 }
724 
725 
726 
uspoof_getBidiSkeletonUnicodeString(const USpoofChecker * sc,UBiDiDirection direction,const UnicodeString & id,UnicodeString & dest,UErrorCode * status)727 U_I18N_API UnicodeString &U_EXPORT2 uspoof_getBidiSkeletonUnicodeString(const USpoofChecker *sc,
728                                                                         UBiDiDirection direction,
729                                                                         const UnicodeString &id,
730                                                                         UnicodeString &dest,
731                                                                         UErrorCode *status) {
732     dest.remove();
733     if (direction != UBIDI_LTR && direction != UBIDI_RTL) {
734       *status = U_ILLEGAL_ARGUMENT_ERROR;
735       return dest;
736     }
737     UBiDi *bidi = ubidi_open();
738     ubidi_setPara(bidi, id.getBuffer(), id.length(), direction,
739                   /*embeddingLevels*/ nullptr, status);
740     if (U_FAILURE(*status)) {
741         ubidi_close(bidi);
742         return dest;
743     }
744     UnicodeString reordered;
745     int32_t const size = ubidi_getProcessedLength(bidi);
746     UChar* const reorderedBuffer = reordered.getBuffer(size);
747     if (reorderedBuffer == nullptr) {
748         *status = U_MEMORY_ALLOCATION_ERROR;
749         ubidi_close(bidi);
750         return dest;
751     }
752     ubidi_writeReordered(bidi, reorderedBuffer, size,
753                          UBIDI_KEEP_BASE_COMBINING | UBIDI_DO_MIRRORING, status);
754     reordered.releaseBuffer(size);
755     ubidi_close(bidi);
756 
757     if (U_FAILURE(*status)) {
758         return dest;
759     }
760 
761     // The type parameter is deprecated since ICU 58; any number may be passed.
762     constexpr uint32_t deprecatedType = 58;
763     return uspoof_getSkeletonUnicodeString(sc, deprecatedType, reordered, dest, status);
764 }
765 
766 
767 
768 U_I18N_API UnicodeString &  U_EXPORT2
uspoof_getSkeletonUnicodeString(const USpoofChecker * sc,uint32_t,const UnicodeString & id,UnicodeString & dest,UErrorCode * status)769 uspoof_getSkeletonUnicodeString(const USpoofChecker *sc,
770                                 uint32_t /*type*/,
771                                 const UnicodeString &id,
772                                 UnicodeString &dest,
773                                 UErrorCode *status) {
774     const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
775     if (U_FAILURE(*status)) {
776         return dest;
777     }
778 
779     UnicodeString nfdId;
780     gNfdNormalizer->normalize(id, nfdId, *status);
781 
782     // Apply the skeleton mapping to the NFD normalized input string
783     // Accumulate the skeleton, possibly unnormalized, in a UnicodeString.
784     int32_t inputIndex = 0;
785     UnicodeString skelStr;
786     int32_t normalizedLen = nfdId.length();
787     for (inputIndex=0; inputIndex < normalizedLen; ) {
788         UChar32 c = nfdId.char32At(inputIndex);
789         inputIndex += U16_LENGTH(c);
790         if (!u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) {
791             This->fSpoofData->confusableLookup(c, skelStr);
792         }
793     }
794 
795     gNfdNormalizer->normalize(skelStr, dest, *status);
796     return dest;
797 }
798 
uspoof_getSkeletonUTF8(const USpoofChecker * sc,uint32_t type,const char * id,int32_t length,char * dest,int32_t destCapacity,UErrorCode * status)799 U_CAPI int32_t U_EXPORT2 uspoof_getSkeletonUTF8(const USpoofChecker *sc, uint32_t type, const char *id,
800                                                 int32_t length, char *dest, int32_t destCapacity,
801                        UErrorCode *status) {
802     SpoofImpl::validateThis(sc, *status);
803     if (U_FAILURE(*status)) {
804         return 0;
805     }
806     if (length<-1 || destCapacity<0 || (destCapacity==0 && dest!=nullptr)) {
807         *status = U_ILLEGAL_ARGUMENT_ERROR;
808         return 0;
809     }
810 
811     UnicodeString srcStr = UnicodeString::fromUTF8(
812         StringPiece(id, length >= 0 ? length : static_cast<int32_t>(uprv_strlen(id))));
813     UnicodeString destStr;
814     uspoof_getSkeletonUnicodeString(sc, type, srcStr, destStr, status);
815     if (U_FAILURE(*status)) {
816         return 0;
817     }
818 
819     int32_t lengthInUTF8 = 0;
820     u_strToUTF8(dest, destCapacity, &lengthInUTF8, destStr.getBuffer(), destStr.length(), status);
821     return lengthInUTF8;
822 }
823 
uspoof_getBidiSkeletonUTF8(const USpoofChecker * sc,UBiDiDirection direction,const char * id,int32_t length,char * dest,int32_t destCapacity,UErrorCode * status)824 U_CAPI int32_t U_EXPORT2 uspoof_getBidiSkeletonUTF8(const USpoofChecker *sc, UBiDiDirection direction,
825                                                     const char *id, int32_t length, char *dest,
826                                                     int32_t destCapacity, UErrorCode *status) {
827     if (length < -1) {
828         *status = U_ILLEGAL_ARGUMENT_ERROR;
829         return 0;
830     }
831 
832     UnicodeString srcStr = UnicodeString::fromUTF8(
833         StringPiece(id, length >= 0 ? length : static_cast<int32_t>(uprv_strlen(id))));
834     UnicodeString destStr;
835     uspoof_getBidiSkeletonUnicodeString(sc, direction, srcStr, destStr, status);
836     if (U_FAILURE(*status)) {
837         return 0;
838     }
839 
840     int32_t lengthInUTF8 = 0;
841     u_strToUTF8(dest, destCapacity, &lengthInUTF8, destStr.getBuffer(), destStr.length(), status);
842     return lengthInUTF8;
843 }
844 
845 
846 U_CAPI int32_t U_EXPORT2
uspoof_serialize(USpoofChecker * sc,void * buf,int32_t capacity,UErrorCode * status)847 uspoof_serialize(USpoofChecker *sc,void *buf, int32_t capacity, UErrorCode *status) {
848     SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
849     if (This == nullptr) {
850         U_ASSERT(U_FAILURE(*status));
851         return 0;
852     }
853 
854     return This->fSpoofData->serialize(buf, capacity, *status);
855 }
856 
857 U_CAPI const USet * U_EXPORT2
uspoof_getInclusionSet(UErrorCode * status)858 uspoof_getInclusionSet(UErrorCode *status) {
859     umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
860     return gInclusionSet->toUSet();
861 }
862 
863 U_CAPI const USet * U_EXPORT2
uspoof_getRecommendedSet(UErrorCode * status)864 uspoof_getRecommendedSet(UErrorCode *status) {
865     umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
866     return gRecommendedSet->toUSet();
867 }
868 
869 U_I18N_API const UnicodeSet * U_EXPORT2
uspoof_getInclusionUnicodeSet(UErrorCode * status)870 uspoof_getInclusionUnicodeSet(UErrorCode *status) {
871     umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
872     return gInclusionSet;
873 }
874 
875 U_I18N_API const UnicodeSet * U_EXPORT2
uspoof_getRecommendedUnicodeSet(UErrorCode * status)876 uspoof_getRecommendedUnicodeSet(UErrorCode *status) {
877     umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
878     return gRecommendedSet;
879 }
880 
881 //------------------
882 // CheckResult APIs
883 //------------------
884 
885 U_CAPI USpoofCheckResult* U_EXPORT2
uspoof_openCheckResult(UErrorCode * status)886 uspoof_openCheckResult(UErrorCode *status) {
887     CheckResult* checkResult = new CheckResult();
888     if (checkResult == nullptr) {
889         *status = U_MEMORY_ALLOCATION_ERROR;
890         return nullptr;
891     }
892     return checkResult->asUSpoofCheckResult();
893 }
894 
895 U_CAPI void U_EXPORT2
uspoof_closeCheckResult(USpoofCheckResult * checkResult)896 uspoof_closeCheckResult(USpoofCheckResult* checkResult) {
897     UErrorCode status = U_ZERO_ERROR;
898     CheckResult* This = CheckResult::validateThis(checkResult, status);
899     delete This;
900 }
901 
902 U_CAPI int32_t U_EXPORT2
uspoof_getCheckResultChecks(const USpoofCheckResult * checkResult,UErrorCode * status)903 uspoof_getCheckResultChecks(const USpoofCheckResult *checkResult, UErrorCode *status) {
904     const CheckResult* This = CheckResult::validateThis(checkResult, *status);
905     if (U_FAILURE(*status)) { return 0; }
906     return This->fChecks;
907 }
908 
909 U_CAPI URestrictionLevel U_EXPORT2
uspoof_getCheckResultRestrictionLevel(const USpoofCheckResult * checkResult,UErrorCode * status)910 uspoof_getCheckResultRestrictionLevel(const USpoofCheckResult *checkResult, UErrorCode *status) {
911     const CheckResult* This = CheckResult::validateThis(checkResult, *status);
912     if (U_FAILURE(*status)) { return USPOOF_UNRESTRICTIVE; }
913     return This->fRestrictionLevel;
914 }
915 
916 U_CAPI const USet* U_EXPORT2
uspoof_getCheckResultNumerics(const USpoofCheckResult * checkResult,UErrorCode * status)917 uspoof_getCheckResultNumerics(const USpoofCheckResult *checkResult, UErrorCode *status) {
918     const CheckResult* This = CheckResult::validateThis(checkResult, *status);
919     if (U_FAILURE(*status)) { return nullptr; }
920     return This->fNumerics.toUSet();
921 }
922 
923 
924 
925 #endif // !UCONFIG_NO_NORMALIZATION
926