1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ***************************************************************************
5 * Copyright (C) 2008-2015, International Business Machines Corporation
6 * and others. All Rights Reserved.
7 ***************************************************************************
8 * file name: uspoof.cpp
9 * encoding: UTF-8
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2008Feb13
14 * created by: Andy Heninger
15 *
16 * Unicode Spoof Detection
17 */
18 #include "unicode/ubidi.h"
19 #include "unicode/utypes.h"
20 #include "unicode/normalizer2.h"
21 #include "unicode/uspoof.h"
22 #include "unicode/ustring.h"
23 #include "unicode/utf16.h"
24 #include "cmemory.h"
25 #include "cstring.h"
26 #include "mutex.h"
27 #include "scriptset.h"
28 #include "uassert.h"
29 #include "ucln_in.h"
30 #include "uspoof_impl.h"
31 #include "umutex.h"
32
33
34 #if !UCONFIG_NO_NORMALIZATION
35
36 U_NAMESPACE_USE
37
38
39 //
40 // Static Objects used by the spoof impl, their thread safe initialization and their cleanup.
41 //
42 static UnicodeSet *gInclusionSet = nullptr;
43 static UnicodeSet *gRecommendedSet = nullptr;
44 static const Normalizer2 *gNfdNormalizer = nullptr;
45 static UInitOnce gSpoofInitStaticsOnce {};
46
47 namespace {
48
49 UBool U_CALLCONV
uspoof_cleanup()50 uspoof_cleanup() {
51 delete gInclusionSet;
52 gInclusionSet = nullptr;
53 delete gRecommendedSet;
54 gRecommendedSet = nullptr;
55 gNfdNormalizer = nullptr;
56 gSpoofInitStaticsOnce.reset();
57 return true;
58 }
59
initializeStatics(UErrorCode & status)60 void U_CALLCONV initializeStatics(UErrorCode &status) {
61 gInclusionSet = new UnicodeSet();
62 gRecommendedSet = new UnicodeSet();
63 if (gInclusionSet == nullptr || gRecommendedSet == nullptr) {
64 status = U_MEMORY_ALLOCATION_ERROR;
65 delete gInclusionSet;
66 gInclusionSet = nullptr;
67 delete gRecommendedSet;
68 gRecommendedSet = nullptr;
69 return;
70 }
71 gInclusionSet->applyIntPropertyValue(UCHAR_IDENTIFIER_TYPE, U_ID_TYPE_INCLUSION, status);
72 gRecommendedSet->applyIntPropertyValue(UCHAR_IDENTIFIER_TYPE, U_ID_TYPE_RECOMMENDED, status);
73 if (U_FAILURE(status)) {
74 delete gInclusionSet;
75 gInclusionSet = nullptr;
76 delete gRecommendedSet;
77 gRecommendedSet = nullptr;
78 return;
79 }
80 gInclusionSet->freeze();
81 gRecommendedSet->freeze();
82 gNfdNormalizer = Normalizer2::getNFDInstance(status);
83 ucln_i18n_registerCleanup(UCLN_I18N_SPOOF, uspoof_cleanup);
84 }
85
86 } // namespace
87
uspoof_internalInitStatics(UErrorCode * status)88 U_CFUNC void uspoof_internalInitStatics(UErrorCode *status) {
89 umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
90 }
91
92 U_CAPI USpoofChecker * U_EXPORT2
uspoof_open(UErrorCode * status)93 uspoof_open(UErrorCode *status) {
94 umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
95 if (U_FAILURE(*status)) {
96 return nullptr;
97 }
98 SpoofImpl *si = new SpoofImpl(*status);
99 if (si == nullptr) {
100 *status = U_MEMORY_ALLOCATION_ERROR;
101 return nullptr;
102 }
103 if (U_FAILURE(*status)) {
104 delete si;
105 return nullptr;
106 }
107 return si->asUSpoofChecker();
108 }
109
110
111 U_CAPI USpoofChecker * U_EXPORT2
uspoof_openFromSerialized(const void * data,int32_t length,int32_t * pActualLength,UErrorCode * status)112 uspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLength,
113 UErrorCode *status) {
114 if (U_FAILURE(*status)) {
115 return nullptr;
116 }
117
118 if (data == nullptr) {
119 *status = U_ILLEGAL_ARGUMENT_ERROR;
120 return nullptr;
121 }
122
123 umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
124 if (U_FAILURE(*status))
125 {
126 return nullptr;
127 }
128
129 SpoofData *sd = new SpoofData(data, length, *status);
130 if (sd == nullptr) {
131 *status = U_MEMORY_ALLOCATION_ERROR;
132 return nullptr;
133 }
134
135 if (U_FAILURE(*status)) {
136 delete sd;
137 return nullptr;
138 }
139
140 SpoofImpl *si = new SpoofImpl(sd, *status);
141 if (si == nullptr) {
142 *status = U_MEMORY_ALLOCATION_ERROR;
143 delete sd; // explicit delete as the destructor for si won't be called.
144 return nullptr;
145 }
146
147 if (U_FAILURE(*status)) {
148 delete si; // no delete for sd, as the si destructor will delete it.
149 return nullptr;
150 }
151
152 if (pActualLength != nullptr) {
153 *pActualLength = sd->size();
154 }
155 return si->asUSpoofChecker();
156 }
157
158
159 U_CAPI USpoofChecker * U_EXPORT2
uspoof_clone(const USpoofChecker * sc,UErrorCode * status)160 uspoof_clone(const USpoofChecker *sc, UErrorCode *status) {
161 const SpoofImpl *src = SpoofImpl::validateThis(sc, *status);
162 if (src == nullptr) {
163 return nullptr;
164 }
165 SpoofImpl *result = new SpoofImpl(*src, *status); // copy constructor
166 if (result == nullptr) {
167 *status = U_MEMORY_ALLOCATION_ERROR;
168 return nullptr;
169 }
170 if (U_FAILURE(*status)) {
171 delete result;
172 result = nullptr;
173 }
174 return result->asUSpoofChecker();
175 }
176
177
178 U_CAPI void U_EXPORT2
uspoof_close(USpoofChecker * sc)179 uspoof_close(USpoofChecker *sc) {
180 UErrorCode status = U_ZERO_ERROR;
181 SpoofImpl *This = SpoofImpl::validateThis(sc, status);
182 delete This;
183 }
184
185
186 U_CAPI void U_EXPORT2
uspoof_setChecks(USpoofChecker * sc,int32_t checks,UErrorCode * status)187 uspoof_setChecks(USpoofChecker *sc, int32_t checks, UErrorCode *status) {
188 SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
189 if (This == nullptr) {
190 return;
191 }
192
193 // Verify that the requested checks are all ones (bits) that
194 // are acceptable, known values.
195 if (checks & ~(USPOOF_ALL_CHECKS | USPOOF_AUX_INFO)) {
196 *status = U_ILLEGAL_ARGUMENT_ERROR;
197 return;
198 }
199
200 This->fChecks = checks;
201 }
202
203
204 U_CAPI int32_t U_EXPORT2
uspoof_getChecks(const USpoofChecker * sc,UErrorCode * status)205 uspoof_getChecks(const USpoofChecker *sc, UErrorCode *status) {
206 const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
207 if (This == nullptr) {
208 return 0;
209 }
210 return This->fChecks;
211 }
212
213 U_CAPI void U_EXPORT2
uspoof_setRestrictionLevel(USpoofChecker * sc,URestrictionLevel restrictionLevel)214 uspoof_setRestrictionLevel(USpoofChecker *sc, URestrictionLevel restrictionLevel) {
215 UErrorCode status = U_ZERO_ERROR;
216 SpoofImpl *This = SpoofImpl::validateThis(sc, status);
217 if (This != nullptr) {
218 This->fRestrictionLevel = restrictionLevel;
219 This->fChecks |= USPOOF_RESTRICTION_LEVEL;
220 }
221 }
222
223 U_CAPI URestrictionLevel U_EXPORT2
uspoof_getRestrictionLevel(const USpoofChecker * sc)224 uspoof_getRestrictionLevel(const USpoofChecker *sc) {
225 UErrorCode status = U_ZERO_ERROR;
226 const SpoofImpl *This = SpoofImpl::validateThis(sc, status);
227 if (This == nullptr) {
228 return USPOOF_UNRESTRICTIVE;
229 }
230 return This->fRestrictionLevel;
231 }
232
233 U_CAPI void U_EXPORT2
uspoof_setAllowedLocales(USpoofChecker * sc,const char * localesList,UErrorCode * status)234 uspoof_setAllowedLocales(USpoofChecker *sc, const char *localesList, UErrorCode *status) {
235 SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
236 if (This == nullptr) {
237 return;
238 }
239 This->setAllowedLocales(localesList, *status);
240 }
241
242 U_CAPI const char * U_EXPORT2
uspoof_getAllowedLocales(USpoofChecker * sc,UErrorCode * status)243 uspoof_getAllowedLocales(USpoofChecker *sc, UErrorCode *status) {
244 SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
245 if (This == nullptr) {
246 return nullptr;
247 }
248 return This->getAllowedLocales(*status);
249 }
250
251
252 U_CAPI const USet * U_EXPORT2
uspoof_getAllowedChars(const USpoofChecker * sc,UErrorCode * status)253 uspoof_getAllowedChars(const USpoofChecker *sc, UErrorCode *status) {
254 const UnicodeSet *result = uspoof_getAllowedUnicodeSet(sc, status);
255 return result->toUSet();
256 }
257
258 U_CAPI const UnicodeSet * U_EXPORT2
uspoof_getAllowedUnicodeSet(const USpoofChecker * sc,UErrorCode * status)259 uspoof_getAllowedUnicodeSet(const USpoofChecker *sc, UErrorCode *status) {
260 const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
261 if (This == nullptr) {
262 return nullptr;
263 }
264 return This->fAllowedCharsSet;
265 }
266
267
268 U_CAPI void U_EXPORT2
uspoof_setAllowedChars(USpoofChecker * sc,const USet * chars,UErrorCode * status)269 uspoof_setAllowedChars(USpoofChecker *sc, const USet *chars, UErrorCode *status) {
270 const UnicodeSet *set = UnicodeSet::fromUSet(chars);
271 uspoof_setAllowedUnicodeSet(sc, set, status);
272 }
273
274
275 U_CAPI void U_EXPORT2
uspoof_setAllowedUnicodeSet(USpoofChecker * sc,const UnicodeSet * chars,UErrorCode * status)276 uspoof_setAllowedUnicodeSet(USpoofChecker *sc, const UnicodeSet *chars, UErrorCode *status) {
277 SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
278 if (This == nullptr) {
279 return;
280 }
281 if (chars->isBogus()) {
282 *status = U_ILLEGAL_ARGUMENT_ERROR;
283 return;
284 }
285 UnicodeSet *clonedSet = chars->clone();
286 if (clonedSet == nullptr || clonedSet->isBogus()) {
287 *status = U_MEMORY_ALLOCATION_ERROR;
288 return;
289 }
290 clonedSet->freeze();
291 delete This->fAllowedCharsSet;
292 This->fAllowedCharsSet = clonedSet;
293 This->fChecks |= USPOOF_CHAR_LIMIT;
294 }
295
296
297 U_CAPI int32_t U_EXPORT2
uspoof_check(const USpoofChecker * sc,const char16_t * id,int32_t length,int32_t * position,UErrorCode * status)298 uspoof_check(const USpoofChecker *sc,
299 const char16_t *id, int32_t length,
300 int32_t *position,
301 UErrorCode *status) {
302
303 // Backwards compatibility:
304 if (position != nullptr) {
305 *position = 0;
306 }
307
308 // Delegate to uspoof_check2
309 return uspoof_check2(sc, id, length, nullptr, status);
310 }
311
312
313 U_CAPI int32_t U_EXPORT2
uspoof_check2(const USpoofChecker * sc,const char16_t * id,int32_t length,USpoofCheckResult * checkResult,UErrorCode * status)314 uspoof_check2(const USpoofChecker *sc,
315 const char16_t* id, int32_t length,
316 USpoofCheckResult* checkResult,
317 UErrorCode *status) {
318
319 const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
320 if (This == nullptr) {
321 return 0;
322 }
323 if (length < -1) {
324 *status = U_ILLEGAL_ARGUMENT_ERROR;
325 return 0;
326 }
327 UnicodeString idStr((length == -1), id, length); // Aliasing constructor.
328 int32_t result = uspoof_check2UnicodeString(sc, idStr, checkResult, status);
329 return result;
330 }
331
332
333 U_CAPI int32_t U_EXPORT2
uspoof_checkUTF8(const USpoofChecker * sc,const char * id,int32_t length,int32_t * position,UErrorCode * status)334 uspoof_checkUTF8(const USpoofChecker *sc,
335 const char *id, int32_t length,
336 int32_t *position,
337 UErrorCode *status) {
338
339 // Backwards compatibility:
340 if (position != nullptr) {
341 *position = 0;
342 }
343
344 // Delegate to uspoof_check2
345 return uspoof_check2UTF8(sc, id, length, nullptr, status);
346 }
347
348
349 U_CAPI int32_t U_EXPORT2
uspoof_check2UTF8(const USpoofChecker * sc,const char * id,int32_t length,USpoofCheckResult * checkResult,UErrorCode * status)350 uspoof_check2UTF8(const USpoofChecker *sc,
351 const char *id, int32_t length,
352 USpoofCheckResult* checkResult,
353 UErrorCode *status) {
354
355 if (U_FAILURE(*status)) {
356 return 0;
357 }
358 UnicodeString idStr = UnicodeString::fromUTF8(StringPiece(id, length>=0 ? length : static_cast<int32_t>(uprv_strlen(id))));
359 int32_t result = uspoof_check2UnicodeString(sc, idStr, checkResult, status);
360 return result;
361 }
362
363
364 U_CAPI int32_t U_EXPORT2
uspoof_areConfusable(const USpoofChecker * sc,const char16_t * id1,int32_t length1,const char16_t * id2,int32_t length2,UErrorCode * status)365 uspoof_areConfusable(const USpoofChecker *sc,
366 const char16_t *id1, int32_t length1,
367 const char16_t *id2, int32_t length2,
368 UErrorCode *status) {
369 SpoofImpl::validateThis(sc, *status);
370 if (U_FAILURE(*status)) {
371 return 0;
372 }
373 if (length1 < -1 || length2 < -1) {
374 *status = U_ILLEGAL_ARGUMENT_ERROR;
375 return 0;
376 }
377
378 UnicodeString id1Str((length1==-1), id1, length1); // Aliasing constructor
379 UnicodeString id2Str((length2==-1), id2, length2); // Aliasing constructor
380 return uspoof_areConfusableUnicodeString(sc, id1Str, id2Str, status);
381 }
382
383
384 U_CAPI int32_t U_EXPORT2
uspoof_areConfusableUTF8(const USpoofChecker * sc,const char * id1,int32_t length1,const char * id2,int32_t length2,UErrorCode * status)385 uspoof_areConfusableUTF8(const USpoofChecker *sc,
386 const char *id1, int32_t length1,
387 const char *id2, int32_t length2,
388 UErrorCode *status) {
389 SpoofImpl::validateThis(sc, *status);
390 if (U_FAILURE(*status)) {
391 return 0;
392 }
393 if (length1 < -1 || length2 < -1) {
394 *status = U_ILLEGAL_ARGUMENT_ERROR;
395 return 0;
396 }
397 UnicodeString id1Str = UnicodeString::fromUTF8(StringPiece(id1, length1>=0? length1 : static_cast<int32_t>(uprv_strlen(id1))));
398 UnicodeString id2Str = UnicodeString::fromUTF8(StringPiece(id2, length2>=0? length2 : static_cast<int32_t>(uprv_strlen(id2))));
399 int32_t results = uspoof_areConfusableUnicodeString(sc, id1Str, id2Str, status);
400 return results;
401 }
402
403
404 U_CAPI int32_t U_EXPORT2
uspoof_areConfusableUnicodeString(const USpoofChecker * sc,const icu::UnicodeString & id1,const icu::UnicodeString & id2,UErrorCode * status)405 uspoof_areConfusableUnicodeString(const USpoofChecker *sc,
406 const icu::UnicodeString &id1,
407 const icu::UnicodeString &id2,
408 UErrorCode *status) {
409 const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
410 if (U_FAILURE(*status)) {
411 return 0;
412 }
413 //
414 // See section 4 of UAX 39 for the algorithm for checking whether two strings are confusable,
415 // and for definitions of the types (single, whole, mixed-script) of confusables.
416
417 // We only care about a few of the check flags. Ignore the others.
418 // If no tests relevant to this function have been specified, return an error.
419 // TODO: is this really the right thing to do? It's probably an error on the caller's part,
420 // but logically we would just return 0 (no error).
421 if ((This->fChecks & USPOOF_CONFUSABLE) == 0) {
422 *status = U_INVALID_STATE_ERROR;
423 return 0;
424 }
425
426 // Compute the skeletons and check for confusability.
427 UnicodeString id1Skeleton;
428 uspoof_getSkeletonUnicodeString(sc, 0 /* deprecated */, id1, id1Skeleton, status);
429 UnicodeString id2Skeleton;
430 uspoof_getSkeletonUnicodeString(sc, 0 /* deprecated */, id2, id2Skeleton, status);
431 if (U_FAILURE(*status)) { return 0; }
432 if (id1Skeleton != id2Skeleton) {
433 return 0;
434 }
435
436 // If we get here, the strings are confusable. Now we just need to set the flags for the appropriate classes
437 // of confusables according to UTS 39 section 4.
438 // Start by computing the resolved script sets of id1 and id2.
439 ScriptSet id1RSS;
440 This->getResolvedScriptSet(id1, id1RSS, *status);
441 ScriptSet id2RSS;
442 This->getResolvedScriptSet(id2, id2RSS, *status);
443
444 // Turn on all applicable flags
445 int32_t result = 0;
446 if (id1RSS.intersects(id2RSS)) {
447 result |= USPOOF_SINGLE_SCRIPT_CONFUSABLE;
448 } else {
449 result |= USPOOF_MIXED_SCRIPT_CONFUSABLE;
450 if (!id1RSS.isEmpty() && !id2RSS.isEmpty()) {
451 result |= USPOOF_WHOLE_SCRIPT_CONFUSABLE;
452 }
453 }
454
455 // Turn off flags that the user doesn't want
456 if ((This->fChecks & USPOOF_SINGLE_SCRIPT_CONFUSABLE) == 0) {
457 result &= ~USPOOF_SINGLE_SCRIPT_CONFUSABLE;
458 }
459 if ((This->fChecks & USPOOF_MIXED_SCRIPT_CONFUSABLE) == 0) {
460 result &= ~USPOOF_MIXED_SCRIPT_CONFUSABLE;
461 }
462 if ((This->fChecks & USPOOF_WHOLE_SCRIPT_CONFUSABLE) == 0) {
463 result &= ~USPOOF_WHOLE_SCRIPT_CONFUSABLE;
464 }
465
466 return result;
467 }
468
uspoof_areBidiConfusable(const USpoofChecker * sc,UBiDiDirection direction,const char16_t * id1,int32_t length1,const char16_t * id2,int32_t length2,UErrorCode * status)469 U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusable(const USpoofChecker *sc, UBiDiDirection direction,
470 const char16_t *id1, int32_t length1,
471 const char16_t *id2, int32_t length2,
472 UErrorCode *status) {
473 UnicodeString id1Str((length1 == -1), id1, length1); // Aliasing constructor
474 UnicodeString id2Str((length2 == -1), id2, length2); // Aliasing constructor
475 if (id1Str.isBogus() || id2Str.isBogus()) {
476 *status = U_ILLEGAL_ARGUMENT_ERROR;
477 return 0;
478 }
479 return uspoof_areBidiConfusableUnicodeString(sc, direction, id1Str, id2Str, status);
480 }
481
uspoof_areBidiConfusableUTF8(const USpoofChecker * sc,UBiDiDirection direction,const char * id1,int32_t length1,const char * id2,int32_t length2,UErrorCode * status)482 U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusableUTF8(const USpoofChecker *sc, UBiDiDirection direction,
483 const char *id1, int32_t length1, const char *id2,
484 int32_t length2, UErrorCode *status) {
485 if (length1 < -1 || length2 < -1) {
486 *status = U_ILLEGAL_ARGUMENT_ERROR;
487 return 0;
488 }
489 UnicodeString id1Str = UnicodeString::fromUTF8(
490 StringPiece(id1, length1 >= 0 ? length1 : static_cast<int32_t>(uprv_strlen(id1))));
491 UnicodeString id2Str = UnicodeString::fromUTF8(
492 StringPiece(id2, length2 >= 0 ? length2 : static_cast<int32_t>(uprv_strlen(id2))));
493 return uspoof_areBidiConfusableUnicodeString(sc, direction, id1Str, id2Str, status);
494 }
495
uspoof_areBidiConfusableUnicodeString(const USpoofChecker * sc,UBiDiDirection direction,const icu::UnicodeString & id1,const icu::UnicodeString & id2,UErrorCode * status)496 U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusableUnicodeString(const USpoofChecker *sc,
497 UBiDiDirection direction,
498 const icu::UnicodeString &id1,
499 const icu::UnicodeString &id2,
500 UErrorCode *status) {
501 const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
502 if (U_FAILURE(*status)) {
503 return 0;
504 }
505 //
506 // See section 4 of UTS 39 for the algorithm for checking whether two strings are confusable,
507 // and for definitions of the types (single, whole, mixed-script) of confusables.
508
509 // We only care about a few of the check flags. Ignore the others.
510 // If no tests relevant to this function have been specified, return an error.
511 // TODO: is this really the right thing to do? It's probably an error on the caller's part,
512 // but logically we would just return 0 (no error).
513 if ((This->fChecks & USPOOF_CONFUSABLE) == 0) {
514 *status = U_INVALID_STATE_ERROR;
515 return 0;
516 }
517
518 // Compute the skeletons and check for confusability.
519 UnicodeString id1Skeleton;
520 uspoof_getBidiSkeletonUnicodeString(sc, direction, id1, id1Skeleton, status);
521 UnicodeString id2Skeleton;
522 uspoof_getBidiSkeletonUnicodeString(sc, direction, id2, id2Skeleton, status);
523 if (U_FAILURE(*status)) {
524 return 0;
525 }
526 if (id1Skeleton != id2Skeleton) {
527 return 0;
528 }
529
530 // If we get here, the strings are confusable. Now we just need to set the flags for the appropriate
531 // classes of confusables according to UTS 39 section 4. Start by computing the resolved script sets
532 // of id1 and id2.
533 ScriptSet id1RSS;
534 This->getResolvedScriptSet(id1, id1RSS, *status);
535 ScriptSet id2RSS;
536 This->getResolvedScriptSet(id2, id2RSS, *status);
537
538 // Turn on all applicable flags
539 uint32_t result = 0;
540 if (id1RSS.intersects(id2RSS)) {
541 result |= USPOOF_SINGLE_SCRIPT_CONFUSABLE;
542 } else {
543 result |= USPOOF_MIXED_SCRIPT_CONFUSABLE;
544 if (!id1RSS.isEmpty() && !id2RSS.isEmpty()) {
545 result |= USPOOF_WHOLE_SCRIPT_CONFUSABLE;
546 }
547 }
548
549 // Turn off flags that the user doesn't want
550 return result & This->fChecks;
551 }
552
553
554 U_CAPI int32_t U_EXPORT2
uspoof_checkUnicodeString(const USpoofChecker * sc,const icu::UnicodeString & id,int32_t * position,UErrorCode * status)555 uspoof_checkUnicodeString(const USpoofChecker *sc,
556 const icu::UnicodeString &id,
557 int32_t *position,
558 UErrorCode *status) {
559
560 // Backwards compatibility:
561 if (position != nullptr) {
562 *position = 0;
563 }
564
565 // Delegate to uspoof_check2
566 return uspoof_check2UnicodeString(sc, id, nullptr, status);
567 }
568
569 namespace {
570
checkImpl(const SpoofImpl * This,const UnicodeString & id,CheckResult * checkResult,UErrorCode * status)571 int32_t checkImpl(const SpoofImpl* This, const UnicodeString& id, CheckResult* checkResult, UErrorCode* status) {
572 U_ASSERT(This != nullptr);
573 U_ASSERT(checkResult != nullptr);
574 checkResult->clear();
575 int32_t result = 0;
576
577 if (0 != (This->fChecks & USPOOF_RESTRICTION_LEVEL)) {
578 URestrictionLevel idRestrictionLevel = This->getRestrictionLevel(id, *status);
579 if (idRestrictionLevel > This->fRestrictionLevel) {
580 result |= USPOOF_RESTRICTION_LEVEL;
581 }
582 checkResult->fRestrictionLevel = idRestrictionLevel;
583 }
584
585 if (0 != (This->fChecks & USPOOF_MIXED_NUMBERS)) {
586 UnicodeSet numerics;
587 This->getNumerics(id, numerics, *status);
588 if (numerics.size() > 1) {
589 result |= USPOOF_MIXED_NUMBERS;
590 }
591 checkResult->fNumerics = numerics; // UnicodeSet::operator=
592 }
593
594 if (0 != (This->fChecks & USPOOF_HIDDEN_OVERLAY)) {
595 int32_t index = This->findHiddenOverlay(id, *status);
596 if (index != -1) {
597 result |= USPOOF_HIDDEN_OVERLAY;
598 }
599 }
600
601
602 if (0 != (This->fChecks & USPOOF_CHAR_LIMIT)) {
603 int32_t i;
604 UChar32 c;
605 int32_t length = id.length();
606 for (i=0; i<length ;) {
607 c = id.char32At(i);
608 i += U16_LENGTH(c);
609 if (!This->fAllowedCharsSet->contains(c)) {
610 result |= USPOOF_CHAR_LIMIT;
611 break;
612 }
613 }
614 }
615
616 if (0 != (This->fChecks & USPOOF_INVISIBLE)) {
617 // This check needs to be done on NFD input
618 UnicodeString nfdText;
619 gNfdNormalizer->normalize(id, nfdText, *status);
620 int32_t nfdLength = nfdText.length();
621
622 // scan for more than one occurrence of the same non-spacing mark
623 // in a sequence of non-spacing marks.
624 int32_t i;
625 UChar32 c;
626 UChar32 firstNonspacingMark = 0;
627 UBool haveMultipleMarks = false;
628 UnicodeSet marksSeenSoFar; // Set of combining marks in a single combining sequence.
629
630 for (i=0; i<nfdLength ;) {
631 c = nfdText.char32At(i);
632 i += U16_LENGTH(c);
633 if (u_charType(c) != U_NON_SPACING_MARK) {
634 firstNonspacingMark = 0;
635 if (haveMultipleMarks) {
636 marksSeenSoFar.clear();
637 haveMultipleMarks = false;
638 }
639 continue;
640 }
641 if (firstNonspacingMark == 0) {
642 firstNonspacingMark = c;
643 continue;
644 }
645 if (!haveMultipleMarks) {
646 marksSeenSoFar.add(firstNonspacingMark);
647 haveMultipleMarks = true;
648 }
649 if (marksSeenSoFar.contains(c)) {
650 // report the error, and stop scanning.
651 // No need to find more than the first failure.
652 result |= USPOOF_INVISIBLE;
653 break;
654 }
655 marksSeenSoFar.add(c);
656 }
657 }
658
659 checkResult->fChecks = result;
660 return checkResult->toCombinedBitmask(This->fChecks);
661 }
662
663 } // namespace
664
665 U_CAPI int32_t U_EXPORT2
uspoof_check2UnicodeString(const USpoofChecker * sc,const icu::UnicodeString & id,USpoofCheckResult * checkResult,UErrorCode * status)666 uspoof_check2UnicodeString(const USpoofChecker *sc,
667 const icu::UnicodeString &id,
668 USpoofCheckResult* checkResult,
669 UErrorCode *status) {
670 const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
671 if (This == nullptr) {
672 return false;
673 }
674
675 if (checkResult != nullptr) {
676 CheckResult* ThisCheckResult = CheckResult::validateThis(checkResult, *status);
677 if (ThisCheckResult == nullptr) {
678 return false;
679 }
680 return checkImpl(This, id, ThisCheckResult, status);
681 } else {
682 // Stack-allocate the checkResult since this method doesn't return it
683 CheckResult stackCheckResult;
684 return checkImpl(This, id, &stackCheckResult, status);
685 }
686 }
687
688
689 U_CAPI int32_t U_EXPORT2
uspoof_getSkeleton(const USpoofChecker * sc,uint32_t type,const char16_t * id,int32_t length,char16_t * dest,int32_t destCapacity,UErrorCode * status)690 uspoof_getSkeleton(const USpoofChecker *sc,
691 uint32_t type,
692 const char16_t *id, int32_t length,
693 char16_t *dest, int32_t destCapacity,
694 UErrorCode *status) {
695
696 SpoofImpl::validateThis(sc, *status);
697 if (U_FAILURE(*status)) {
698 return 0;
699 }
700 if (length<-1 || destCapacity<0 || (destCapacity==0 && dest!=nullptr)) {
701 *status = U_ILLEGAL_ARGUMENT_ERROR;
702 return 0;
703 }
704
705 UnicodeString idStr((length==-1), id, length); // Aliasing constructor
706 UnicodeString destStr;
707 uspoof_getSkeletonUnicodeString(sc, type, idStr, destStr, status);
708 destStr.extract(dest, destCapacity, *status);
709 return destStr.length();
710 }
711
uspoof_getBidiSkeleton(const USpoofChecker * sc,UBiDiDirection direction,const UChar * id,int32_t length,UChar * dest,int32_t destCapacity,UErrorCode * status)712 U_CAPI int32_t U_EXPORT2 uspoof_getBidiSkeleton(const USpoofChecker *sc, UBiDiDirection direction,
713 const UChar *id, int32_t length, UChar *dest,
714 int32_t destCapacity, UErrorCode *status) {
715 UnicodeString idStr((length == -1), id, length); // Aliasing constructor
716 if (idStr.isBogus()) {
717 *status = U_ILLEGAL_ARGUMENT_ERROR;
718 return 0;
719 }
720 UnicodeString destStr;
721 uspoof_getBidiSkeletonUnicodeString(sc, direction, idStr, destStr, status);
722 return destStr.extract(dest, destCapacity, *status);
723 }
724
725
726
uspoof_getBidiSkeletonUnicodeString(const USpoofChecker * sc,UBiDiDirection direction,const UnicodeString & id,UnicodeString & dest,UErrorCode * status)727 U_I18N_API UnicodeString &U_EXPORT2 uspoof_getBidiSkeletonUnicodeString(const USpoofChecker *sc,
728 UBiDiDirection direction,
729 const UnicodeString &id,
730 UnicodeString &dest,
731 UErrorCode *status) {
732 dest.remove();
733 if (direction != UBIDI_LTR && direction != UBIDI_RTL) {
734 *status = U_ILLEGAL_ARGUMENT_ERROR;
735 return dest;
736 }
737 UBiDi *bidi = ubidi_open();
738 ubidi_setPara(bidi, id.getBuffer(), id.length(), direction,
739 /*embeddingLevels*/ nullptr, status);
740 if (U_FAILURE(*status)) {
741 ubidi_close(bidi);
742 return dest;
743 }
744 UnicodeString reordered;
745 int32_t const size = ubidi_getProcessedLength(bidi);
746 UChar* const reorderedBuffer = reordered.getBuffer(size);
747 if (reorderedBuffer == nullptr) {
748 *status = U_MEMORY_ALLOCATION_ERROR;
749 ubidi_close(bidi);
750 return dest;
751 }
752 ubidi_writeReordered(bidi, reorderedBuffer, size,
753 UBIDI_KEEP_BASE_COMBINING | UBIDI_DO_MIRRORING, status);
754 reordered.releaseBuffer(size);
755 ubidi_close(bidi);
756
757 if (U_FAILURE(*status)) {
758 return dest;
759 }
760
761 // The type parameter is deprecated since ICU 58; any number may be passed.
762 constexpr uint32_t deprecatedType = 58;
763 return uspoof_getSkeletonUnicodeString(sc, deprecatedType, reordered, dest, status);
764 }
765
766
767
768 U_I18N_API UnicodeString & U_EXPORT2
uspoof_getSkeletonUnicodeString(const USpoofChecker * sc,uint32_t,const UnicodeString & id,UnicodeString & dest,UErrorCode * status)769 uspoof_getSkeletonUnicodeString(const USpoofChecker *sc,
770 uint32_t /*type*/,
771 const UnicodeString &id,
772 UnicodeString &dest,
773 UErrorCode *status) {
774 const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
775 if (U_FAILURE(*status)) {
776 return dest;
777 }
778
779 UnicodeString nfdId;
780 gNfdNormalizer->normalize(id, nfdId, *status);
781
782 // Apply the skeleton mapping to the NFD normalized input string
783 // Accumulate the skeleton, possibly unnormalized, in a UnicodeString.
784 int32_t inputIndex = 0;
785 UnicodeString skelStr;
786 int32_t normalizedLen = nfdId.length();
787 for (inputIndex=0; inputIndex < normalizedLen; ) {
788 UChar32 c = nfdId.char32At(inputIndex);
789 inputIndex += U16_LENGTH(c);
790 if (!u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) {
791 This->fSpoofData->confusableLookup(c, skelStr);
792 }
793 }
794
795 gNfdNormalizer->normalize(skelStr, dest, *status);
796 return dest;
797 }
798
uspoof_getSkeletonUTF8(const USpoofChecker * sc,uint32_t type,const char * id,int32_t length,char * dest,int32_t destCapacity,UErrorCode * status)799 U_CAPI int32_t U_EXPORT2 uspoof_getSkeletonUTF8(const USpoofChecker *sc, uint32_t type, const char *id,
800 int32_t length, char *dest, int32_t destCapacity,
801 UErrorCode *status) {
802 SpoofImpl::validateThis(sc, *status);
803 if (U_FAILURE(*status)) {
804 return 0;
805 }
806 if (length<-1 || destCapacity<0 || (destCapacity==0 && dest!=nullptr)) {
807 *status = U_ILLEGAL_ARGUMENT_ERROR;
808 return 0;
809 }
810
811 UnicodeString srcStr = UnicodeString::fromUTF8(
812 StringPiece(id, length >= 0 ? length : static_cast<int32_t>(uprv_strlen(id))));
813 UnicodeString destStr;
814 uspoof_getSkeletonUnicodeString(sc, type, srcStr, destStr, status);
815 if (U_FAILURE(*status)) {
816 return 0;
817 }
818
819 int32_t lengthInUTF8 = 0;
820 u_strToUTF8(dest, destCapacity, &lengthInUTF8, destStr.getBuffer(), destStr.length(), status);
821 return lengthInUTF8;
822 }
823
uspoof_getBidiSkeletonUTF8(const USpoofChecker * sc,UBiDiDirection direction,const char * id,int32_t length,char * dest,int32_t destCapacity,UErrorCode * status)824 U_CAPI int32_t U_EXPORT2 uspoof_getBidiSkeletonUTF8(const USpoofChecker *sc, UBiDiDirection direction,
825 const char *id, int32_t length, char *dest,
826 int32_t destCapacity, UErrorCode *status) {
827 if (length < -1) {
828 *status = U_ILLEGAL_ARGUMENT_ERROR;
829 return 0;
830 }
831
832 UnicodeString srcStr = UnicodeString::fromUTF8(
833 StringPiece(id, length >= 0 ? length : static_cast<int32_t>(uprv_strlen(id))));
834 UnicodeString destStr;
835 uspoof_getBidiSkeletonUnicodeString(sc, direction, srcStr, destStr, status);
836 if (U_FAILURE(*status)) {
837 return 0;
838 }
839
840 int32_t lengthInUTF8 = 0;
841 u_strToUTF8(dest, destCapacity, &lengthInUTF8, destStr.getBuffer(), destStr.length(), status);
842 return lengthInUTF8;
843 }
844
845
846 U_CAPI int32_t U_EXPORT2
uspoof_serialize(USpoofChecker * sc,void * buf,int32_t capacity,UErrorCode * status)847 uspoof_serialize(USpoofChecker *sc,void *buf, int32_t capacity, UErrorCode *status) {
848 SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
849 if (This == nullptr) {
850 U_ASSERT(U_FAILURE(*status));
851 return 0;
852 }
853
854 return This->fSpoofData->serialize(buf, capacity, *status);
855 }
856
857 U_CAPI const USet * U_EXPORT2
uspoof_getInclusionSet(UErrorCode * status)858 uspoof_getInclusionSet(UErrorCode *status) {
859 umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
860 return gInclusionSet->toUSet();
861 }
862
863 U_CAPI const USet * U_EXPORT2
uspoof_getRecommendedSet(UErrorCode * status)864 uspoof_getRecommendedSet(UErrorCode *status) {
865 umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
866 return gRecommendedSet->toUSet();
867 }
868
869 U_I18N_API const UnicodeSet * U_EXPORT2
uspoof_getInclusionUnicodeSet(UErrorCode * status)870 uspoof_getInclusionUnicodeSet(UErrorCode *status) {
871 umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
872 return gInclusionSet;
873 }
874
875 U_I18N_API const UnicodeSet * U_EXPORT2
uspoof_getRecommendedUnicodeSet(UErrorCode * status)876 uspoof_getRecommendedUnicodeSet(UErrorCode *status) {
877 umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
878 return gRecommendedSet;
879 }
880
881 //------------------
882 // CheckResult APIs
883 //------------------
884
885 U_CAPI USpoofCheckResult* U_EXPORT2
uspoof_openCheckResult(UErrorCode * status)886 uspoof_openCheckResult(UErrorCode *status) {
887 CheckResult* checkResult = new CheckResult();
888 if (checkResult == nullptr) {
889 *status = U_MEMORY_ALLOCATION_ERROR;
890 return nullptr;
891 }
892 return checkResult->asUSpoofCheckResult();
893 }
894
895 U_CAPI void U_EXPORT2
uspoof_closeCheckResult(USpoofCheckResult * checkResult)896 uspoof_closeCheckResult(USpoofCheckResult* checkResult) {
897 UErrorCode status = U_ZERO_ERROR;
898 CheckResult* This = CheckResult::validateThis(checkResult, status);
899 delete This;
900 }
901
902 U_CAPI int32_t U_EXPORT2
uspoof_getCheckResultChecks(const USpoofCheckResult * checkResult,UErrorCode * status)903 uspoof_getCheckResultChecks(const USpoofCheckResult *checkResult, UErrorCode *status) {
904 const CheckResult* This = CheckResult::validateThis(checkResult, *status);
905 if (U_FAILURE(*status)) { return 0; }
906 return This->fChecks;
907 }
908
909 U_CAPI URestrictionLevel U_EXPORT2
uspoof_getCheckResultRestrictionLevel(const USpoofCheckResult * checkResult,UErrorCode * status)910 uspoof_getCheckResultRestrictionLevel(const USpoofCheckResult *checkResult, UErrorCode *status) {
911 const CheckResult* This = CheckResult::validateThis(checkResult, *status);
912 if (U_FAILURE(*status)) { return USPOOF_UNRESTRICTIVE; }
913 return This->fRestrictionLevel;
914 }
915
916 U_CAPI const USet* U_EXPORT2
uspoof_getCheckResultNumerics(const USpoofCheckResult * checkResult,UErrorCode * status)917 uspoof_getCheckResultNumerics(const USpoofCheckResult *checkResult, UErrorCode *status) {
918 const CheckResult* This = CheckResult::validateThis(checkResult, *status);
919 if (U_FAILURE(*status)) { return nullptr; }
920 return This->fNumerics.toUSet();
921 }
922
923
924
925 #endif // !UCONFIG_NO_NORMALIZATION
926