• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 *
6 *   Copyright (C) 2008-2016, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 ******************************************************************************
10 *   file name:  uspoof_conf.h
11 *   encoding:   UTF-8
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 2009Jan05
16 *   created by: Andy Heninger
17 *
18 *   Internal classes for compiling confusable data into its binary (runtime) form.
19 */
20 
21 #ifndef __USPOOF_BUILDCONF_H__
22 #define __USPOOF_BUILDCONF_H__
23 
24 #include "unicode/utypes.h"
25 
26 #if !UCONFIG_NO_NORMALIZATION
27 
28 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
29 
30 #include "unicode/uregex.h"
31 #include "uhash.h"
32 #include "uspoof_impl.h"
33 
34 U_NAMESPACE_BEGIN
35 
36 // SPUString
37 //              Holds a string that is the result of one of the mappings defined
38 //              by the confusable mapping data (confusables.txt from Unicode.org)
39 //              Instances of SPUString exist during the compilation process only.
40 
41 struct SPUString : public UMemory {
42     LocalPointer<UnicodeString> fStr;     // The actual string.
43     int32_t      fCharOrStrTableIndex;    // Index into the final runtime data for this
44                                           // string (or, for length 1, the single string char
45                                           // itself, there being no string table entry for it.)
46 
47     SPUString(LocalPointer<UnicodeString> s);
48     ~SPUString();
49 };
50 
51 
52 //  String Pool   A utility class for holding the strings that are the result of
53 //                the spoof mappings.  These strings will utimately end up in the
54 //                run-time String Table.
55 //                This is sort of like a sorted set of strings, except that ICU's anemic
56 //                built-in collections don't support those, so it is implemented with a
57 //                combination of a uhash and a UVector.
58 
59 
60 class SPUStringPool : public UMemory {
61   public:
62     SPUStringPool(UErrorCode &status);
63     ~SPUStringPool();
64 
65     // Add a string. Return the string from the table.
66     // If the input parameter string is already in the table, delete the
67     //  input parameter and return the existing string.
68     SPUString *addString(UnicodeString *src, UErrorCode &status);
69 
70 
71     // Get the n-th string in the collection.
72     SPUString *getByIndex(int32_t i);
73 
74     // Sort the contents; affects the ordering of getByIndex().
75     void sort(UErrorCode &status);
76 
77     int32_t size();
78 
79   private:
80     UVector     *fVec;    // Elements are SPUString *
81     UHashtable  *fHash;   // Key: UnicodeString  Value: SPUString
82 };
83 
84 
85 // class ConfusabledataBuilder
86 //     An instance of this class exists while the confusable data is being built from source.
87 //     It encapsulates the intermediate data structures that are used for building.
88 //     It exports one static function, to do a confusable data build.
89 
90 class ConfusabledataBuilder : public UMemory {
91   private:
92     SpoofImpl  *fSpoofImpl;
93     UChar      *fInput;
94     UHashtable *fTable;
95     UnicodeSet *fKeySet;     // A set of all keys (UChar32s) that go into the four mapping tables.
96 
97     // The binary data is first assembled into the following four collections, then
98     //   copied to its final raw-memory destination.
99     UVector            *fKeyVec;
100     UVector            *fValueVec;
101     UnicodeString      *fStringTable;
102 
103     SPUStringPool      *stringPool;
104     URegularExpression *fParseLine;
105     URegularExpression *fParseHexNum;
106     int32_t             fLineNum;
107 
108     ConfusabledataBuilder(SpoofImpl *spImpl, UErrorCode &status);
109     ~ConfusabledataBuilder();
110     void build(const char * confusables, int32_t confusablesLen, UErrorCode &status);
111 
112     // Add an entry to the key and value tables being built
113     //   input:  data from SLTable, MATable, etc.
114     //   output:  entry added to fKeyVec and fValueVec
115     void addKeyEntry(UChar32     keyChar,     // The key character
116                      UHashtable *table,       // The table, one of SATable, MATable, etc.
117                      int32_t     tableFlag,   // One of USPOOF_SA_TABLE_FLAG, etc.
118                      UErrorCode &status);
119 
120     // From an index into fKeyVec & fValueVec
121     //   get a UnicodeString with the corresponding mapping.
122     UnicodeString getMapping(int32_t index);
123 
124     // Populate the final binary output data array with the compiled data.
125     void outputData(UErrorCode &status);
126 
127   public:
128     static void buildConfusableData(SpoofImpl *spImpl, const char * confusables,
129         int32_t confusablesLen, int32_t *errorType, UParseError *pe, UErrorCode &status);
130 };
131 U_NAMESPACE_END
132 
133 #endif
134 #endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS
135 #endif  // __USPOOF_BUILDCONF_H__
136