1 /*
2 ***************************************************************************
3 * Copyright (C) 2008-2015, International Business Machines Corporation
4 * and others. All Rights Reserved.
5 ***************************************************************************
6 * file name: uspoof_build.cpp
7 * encoding: US-ASCII
8 * tab size: 8 (not used)
9 * indentation:4
10 *
11 * created on: 2008 Dec 8
12 * created by: Andy Heninger
13 *
14 * Unicode Spoof Detection Data Builder
15 * Builder-related functions are kept in separate files so that applications not needing
16 * the builder can more easily exclude them, typically by means of static linking.
17 *
18 * There are three relatively independent sets of Spoof data,
19 * Confusables,
20 * Whole Script Confusables
21 * ID character extensions.
22 *
23 * The data tables for each are built separately, each from its own definitions
24 */
25
26 #include "unicode/utypes.h"
27 #include "unicode/uspoof.h"
28 #include "unicode/unorm.h"
29 #include "unicode/uregex.h"
30 #include "unicode/ustring.h"
31 #include "cmemory.h"
32 #include "uspoof_impl.h"
33 #include "uhash.h"
34 #include "uvector.h"
35 #include "uassert.h"
36 #include "uarrsort.h"
37 #include "uspoof_conf.h"
38 #include "uspoof_wsconf.h"
39
40 #if !UCONFIG_NO_NORMALIZATION
41
42 U_NAMESPACE_USE
43
44 // Defined in uspoof.cpp, initializes file-static variables.
45 U_CFUNC void uspoof_internalInitStatics(UErrorCode *status);
46
47 // The main data building function
48
49 U_CAPI USpoofChecker * U_EXPORT2
uspoof_openFromSource(const char * confusables,int32_t confusablesLen,const char * confusablesWholeScript,int32_t confusablesWholeScriptLen,int32_t * errorType,UParseError * pe,UErrorCode * status)50 uspoof_openFromSource(const char *confusables, int32_t confusablesLen,
51 const char *confusablesWholeScript, int32_t confusablesWholeScriptLen,
52 int32_t *errorType, UParseError *pe, UErrorCode *status) {
53 uspoof_internalInitStatics(status);
54 if (U_FAILURE(*status)) {
55 return NULL;
56 }
57 #if UCONFIG_NO_REGULAR_EXPRESSIONS
58 *status = U_UNSUPPORTED_ERROR;
59 return NULL;
60 #else
61 if (errorType!=NULL) {
62 *errorType = 0;
63 }
64 if (pe != NULL) {
65 pe->line = 0;
66 pe->offset = 0;
67 pe->preContext[0] = 0;
68 pe->postContext[0] = 0;
69 }
70
71 // Set up a shell of a spoof detector, with empty data.
72 SpoofData *newSpoofData = new SpoofData(*status);
73 SpoofImpl *This = new SpoofImpl(newSpoofData, *status);
74
75 // Compile the binary data from the source (text) format.
76 ConfusabledataBuilder::buildConfusableData(This, confusables, confusablesLen, errorType, pe, *status);
77 buildWSConfusableData(This, confusablesWholeScript, confusablesWholeScriptLen, pe, *status);
78
79 if (U_FAILURE(*status)) {
80 delete This;
81 This = NULL;
82 }
83 return (USpoofChecker *)This;
84 #endif // UCONFIG_NO_REGULAR_EXPRESSIONS
85 }
86
87 #endif
88