1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ****************************************************************************** 5 * 6 * Copyright (C) 2008-2016, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ****************************************************************************** 10 * file name: uspoof_conf.h 11 * encoding: UTF-8 12 * tab size: 8 (not used) 13 * indentation:4 14 * 15 * created on: 2009Jan05 16 * created by: Andy Heninger 17 * 18 * Internal classes for compiling confusable data into its binary (runtime) form. 19 */ 20 21 #ifndef __USPOOF_BUILDCONF_H__ 22 #define __USPOOF_BUILDCONF_H__ 23 24 #include "unicode/utypes.h" 25 26 #if !UCONFIG_NO_NORMALIZATION 27 28 #if !UCONFIG_NO_REGULAR_EXPRESSIONS 29 30 #include "unicode/uregex.h" 31 #include "uhash.h" 32 #include "uspoof_impl.h" 33 34 U_NAMESPACE_BEGIN 35 36 // SPUString 37 // Holds a string that is the result of one of the mappings defined 38 // by the confusable mapping data (confusables.txt from Unicode.org) 39 // Instances of SPUString exist during the compilation process only. 40 41 struct SPUString : public UMemory { 42 LocalPointer<UnicodeString> fStr; // The actual string. 43 int32_t fCharOrStrTableIndex; // Index into the final runtime data for this 44 // string (or, for length 1, the single string char 45 // itself, there being no string table entry for it.) 46 47 SPUString(LocalPointer<UnicodeString> s); 48 ~SPUString(); 49 }; 50 51 52 // String Pool A utility class for holding the strings that are the result of 53 // the spoof mappings. These strings will utimately end up in the 54 // run-time String Table. 55 // This is sort of like a sorted set of strings, except that ICU's anemic 56 // built-in collections don't support those, so it is implemented with a 57 // combination of a uhash and a UVector. 58 59 60 class SPUStringPool : public UMemory { 61 public: 62 SPUStringPool(UErrorCode &status); 63 ~SPUStringPool(); 64 65 // Add a string. Return the string from the table. 66 // If the input parameter string is already in the table, delete the 67 // input parameter and return the existing string. 68 SPUString *addString(UnicodeString *src, UErrorCode &status); 69 70 71 // Get the n-th string in the collection. 72 SPUString *getByIndex(int32_t i); 73 74 // Sort the contents; affects the ordering of getByIndex(). 75 void sort(UErrorCode &status); 76 77 int32_t size(); 78 79 private: 80 UVector *fVec; // Elements are SPUString * 81 UHashtable *fHash; // Key: UnicodeString Value: SPUString 82 }; 83 84 85 // class ConfusabledataBuilder 86 // An instance of this class exists while the confusable data is being built from source. 87 // It encapsulates the intermediate data structures that are used for building. 88 // It exports one static function, to do a confusable data build. 89 90 class ConfusabledataBuilder : public UMemory { 91 private: 92 SpoofImpl *fSpoofImpl; 93 UChar *fInput; 94 UHashtable *fTable; 95 UnicodeSet *fKeySet; // A set of all keys (UChar32s) that go into the four mapping tables. 96 97 // The binary data is first assembled into the following four collections, then 98 // copied to its final raw-memory destination. 99 UVector *fKeyVec; 100 UVector *fValueVec; 101 UnicodeString *fStringTable; 102 103 SPUStringPool *stringPool; 104 URegularExpression *fParseLine; 105 URegularExpression *fParseHexNum; 106 int32_t fLineNum; 107 108 ConfusabledataBuilder(SpoofImpl *spImpl, UErrorCode &status); 109 ~ConfusabledataBuilder(); 110 void build(const char * confusables, int32_t confusablesLen, UErrorCode &status); 111 112 // Add an entry to the key and value tables being built 113 // input: data from SLTable, MATable, etc. 114 // output: entry added to fKeyVec and fValueVec 115 void addKeyEntry(UChar32 keyChar, // The key character 116 UHashtable *table, // The table, one of SATable, MATable, etc. 117 int32_t tableFlag, // One of USPOOF_SA_TABLE_FLAG, etc. 118 UErrorCode &status); 119 120 // From an index into fKeyVec & fValueVec 121 // get a UnicodeString with the corresponding mapping. 122 UnicodeString getMapping(int32_t index); 123 124 // Populate the final binary output data array with the compiled data. 125 void outputData(UErrorCode &status); 126 127 public: 128 static void buildConfusableData(SpoofImpl *spImpl, const char * confusables, 129 int32_t confusablesLen, int32_t *errorType, UParseError *pe, UErrorCode &status); 130 }; 131 U_NAMESPACE_END 132 133 #endif 134 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS 135 #endif // __USPOOF_BUILDCONF_H__ 136