• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *******************************************************************************
3  *
4  *   Copyright (C) 2003-2006, International Business Machines
5  *   Corporation and others.  All Rights Reserved.
6  *
7  *******************************************************************************
8  *   file name:  nptrans.h
9  *   encoding:   US-ASCII
10  *   tab size:   8 (not used)
11  *   indentation:4
12  *
13  *   created on: 2003feb1
14  *   created by: Ram Viswanadha
15  */
16 
17 #ifndef NPTRANS_H
18 #define NPTRANS_H
19 
20 #include "unicode/utypes.h"
21 
22 #if !UCONFIG_NO_IDNA
23 #if !UCONFIG_NO_TRANSLITERATION
24 
25 #include "unicode/uniset.h"
26 #include "unicode/ures.h"
27 #include "unicode/translit.h"
28 
29 #include "intltest.h"
30 
31 
32 #define ASCII_SPACE 0x0020
33 
34 class NamePrepTransform {
35 
36 private :
37     Transliterator *mapping;
38     UnicodeSet unassigned;
39     UnicodeSet prohibited;
40     UnicodeSet labelSeparatorSet;
41     UResourceBundle *bundle;
42     NamePrepTransform(UParseError& parseError, UErrorCode& status);
43 
44 
45 public :
46 
47     static NamePrepTransform* createInstance(UParseError& parseError, UErrorCode& status);
48 
49     virtual ~NamePrepTransform();
50 
51 
52     inline UBool isProhibited(UChar32 ch);
53 
54     /**
55      * ICU "poor man's RTTI", returns a UClassID for the actual class.
56      *
57      * @draft ICU 2.6
58      */
getDynamicClassID()59     inline UClassID getDynamicClassID() const { return getStaticClassID(); }
60 
61     /**
62      * ICU "poor man's RTTI", returns a UClassID for this class.
63      *
64      * @draft ICU 2.6
65      */
getStaticClassID()66     static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; }
67 
68     /**
69      * Map every character in input stream with mapping character
70      * in the mapping table and populate the output stream.
71      * For any individual character the mapping table may specify
72      * that that a character be mapped to nothing, mapped to one
73      * other character or to a string of other characters.
74      *
75      * @param src           Pointer to UChar buffer containing a single label
76      * @param srcLength     Number of characters in the source label
77      * @param dest          Pointer to the destination buffer to receive the output
78      * @param destCapacity  The capacity of destination array
79      * @param allowUnassigned   Unassigned values can be converted to ASCII for query operations
80      *                          If TRUE unassigned values are treated as normal Unicode code point.
81      *                          If FALSE the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code.
82      * @param status        ICU error code in/out parameter.
83      *                      Must fulfill U_SUCCESS before the function call.
84      * @return The number of UChars in the destination buffer
85      *
86      */
87     int32_t map(const UChar* src, int32_t srcLength,
88                         UChar* dest, int32_t destCapacity,
89                         UBool allowUnassigned,
90                         UParseError* parseError,
91                         UErrorCode& status );
92 
93     /**
94      * Prepare the input stream with for use. This operation maps, normalizes(NFKC),
95      * checks for prohited and BiDi characters in the order defined by RFC 3454
96      *
97      * @param src           Pointer to UChar buffer containing a single label
98      * @param srcLength     Number of characters in the source label
99      * @param dest          Pointer to the destination buffer to receive the output
100      * @param destCapacity  The capacity of destination array
101      * @param allowUnassigned   Unassigned values can be converted to ASCII for query operations
102      *                          If TRUE unassigned values are treated as normal Unicode code point.
103      *                          If FALSE the operation fails with U_UNASSIGNED_CODE_POINT error code.
104      * @param status        ICU error code in/out parameter.
105      *                      Must fulfill U_SUCCESS before the function call.
106      * @return The number of UChars in the destination buffer
107      */
108     int32_t process(const UChar* src, int32_t srcLength,
109                             UChar* dest, int32_t destCapacity,
110                             UBool allowUnassigned,
111                             UParseError* parseError,
112                             UErrorCode& status );
113 
114     /**
115      * Ascertain if the given code point is a label separator as specified by IDNA
116      *
117      * @return TRUE is the code point is a label separator
118      *
119      *
120      */
121     UBool isLabelSeparator(UChar32 ch, UErrorCode& status);
122 
123 
124     inline UBool isLDHChar(UChar32 ch);
125 private:
126     /**
127      * The address of this static class variable serves as this class's ID
128      * for ICU "poor man's RTTI".
129      */
130     static const char fgClassID;
131 };
132 
isLDHChar(UChar32 ch)133 inline UBool NamePrepTransform::isLDHChar(UChar32 ch){
134     // high runner case
135     if(ch>0x007A){
136         return FALSE;
137     }
138     //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
139     if( (ch==0x002D) ||
140         (0x0030 <= ch && ch <= 0x0039) ||
141         (0x0041 <= ch && ch <= 0x005A) ||
142         (0x0061 <= ch && ch <= 0x007A)
143       ){
144         return TRUE;
145     }
146     return FALSE;
147 }
148 
149 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
150 #else
151 class NamePrepTransform {
152 };
153 #endif /* #if !UCONFIG_NO_IDNA */
154 
155 #endif
156 
157 /*
158  * Hey, Emacs, please set the following:
159  *
160  * Local Variables:
161  * indent-tabs-mode: nil
162  * End:
163  *
164  */
165