1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2003-2011, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: nptrans.h
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2003feb1
16 * created by: Ram Viswanadha
17 */
18
19 #ifndef NPTRANS_H
20 #define NPTRANS_H
21
22 #include "unicode/utypes.h"
23
24 #if !UCONFIG_NO_IDNA
25 #if !UCONFIG_NO_TRANSLITERATION
26
27 #include "unicode/uniset.h"
28 #include "unicode/ures.h"
29 #include "unicode/translit.h"
30
31 #include "intltest.h"
32
33
34 #define ASCII_SPACE 0x0020
35
36 class NamePrepTransform {
37
38 private :
39 Transliterator *mapping;
40 UnicodeSet unassigned;
41 UnicodeSet prohibited;
42 UnicodeSet labelSeparatorSet;
43 UResourceBundle *bundle;
44 NamePrepTransform(UParseError& parseError, UErrorCode& status);
45
46
47 public :
48
49 static NamePrepTransform* createInstance(UParseError& parseError, UErrorCode& status);
50
51 virtual ~NamePrepTransform();
52
53
54 inline UBool isProhibited(UChar32 ch);
55
56 /**
57 * ICU "poor man's RTTI", returns a UClassID for the actual class.
58 */
getDynamicClassID()59 inline UClassID getDynamicClassID() const { return getStaticClassID(); }
60
61 /**
62 * ICU "poor man's RTTI", returns a UClassID for this class.
63 */
getStaticClassID()64 static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; }
65
66 /**
67 * Map every character in input stream with mapping character
68 * in the mapping table and populate the output stream.
69 * For any individual character the mapping table may specify
70 * that that a character be mapped to nothing, mapped to one
71 * other character or to a string of other characters.
72 *
73 * @param src Pointer to UChar buffer containing a single label
74 * @param srcLength Number of characters in the source label
75 * @param dest Pointer to the destination buffer to receive the output
76 * @param destCapacity The capacity of destination array
77 * @param allowUnassigned Unassigned values can be converted to ASCII for query operations
78 * If TRUE unassigned values are treated as normal Unicode code point.
79 * If FALSE the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code.
80 * @param status ICU error code in/out parameter.
81 * Must fulfill U_SUCCESS before the function call.
82 * @return The number of UChars in the destination buffer
83 */
84 int32_t map(const UChar* src, int32_t srcLength,
85 UChar* dest, int32_t destCapacity,
86 UBool allowUnassigned,
87 UParseError* parseError,
88 UErrorCode& status );
89
90 /**
91 * Prepare the input stream with for use. This operation maps, normalizes(NFKC),
92 * checks for prohited and BiDi characters in the order defined by RFC 3454
93 *
94 * @param src Pointer to UChar buffer containing a single label
95 * @param srcLength Number of characters in the source label
96 * @param dest Pointer to the destination buffer to receive the output
97 * @param destCapacity The capacity of destination array
98 * @param allowUnassigned Unassigned values can be converted to ASCII for query operations
99 * If TRUE unassigned values are treated as normal Unicode code point.
100 * If FALSE the operation fails with U_UNASSIGNED_CODE_POINT error code.
101 * @param status ICU error code in/out parameter.
102 * Must fulfill U_SUCCESS before the function call.
103 * @return The number of UChars in the destination buffer
104 */
105 int32_t process(const UChar* src, int32_t srcLength,
106 UChar* dest, int32_t destCapacity,
107 UBool allowUnassigned,
108 UParseError* parseError,
109 UErrorCode& status );
110
111 /**
112 * Ascertain if the given code point is a label separator as specified by IDNA
113 *
114 * @return TRUE is the code point is a label separator
115 */
116 UBool isLabelSeparator(UChar32 ch, UErrorCode& status);
117
118 inline UBool isLDHChar(UChar32 ch);
119
120 private:
121 /**
122 * The address of this static class variable serves as this class's ID
123 * for ICU "poor man's RTTI".
124 */
125 static const char fgClassID;
126 };
127
isLDHChar(UChar32 ch)128 inline UBool NamePrepTransform::isLDHChar(UChar32 ch){
129 // high runner case
130 if(ch>0x007A){
131 return FALSE;
132 }
133 //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
134 if( (ch==0x002D) ||
135 (0x0030 <= ch && ch <= 0x0039) ||
136 (0x0041 <= ch && ch <= 0x005A) ||
137 (0x0061 <= ch && ch <= 0x007A)
138 ){
139 return TRUE;
140 }
141 return FALSE;
142 }
143
144 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
145 #else
146 class NamePrepTransform {
147 };
148 #endif /* #if !UCONFIG_NO_IDNA */
149
150 #endif
151
152 /*
153 * Hey, Emacs, please set the following:
154 *
155 * Local Variables:
156 * indent-tabs-mode: nil
157 * End:
158 *
159 */
160