1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2003-2010, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: nptrans.h
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2003feb1
14 * created by: Ram Viswanadha
15 */
16
17 #include "unicode/utypes.h"
18
19 #if !UCONFIG_NO_TRANSLITERATION
20 #if !UCONFIG_NO_IDNA
21
22 #include "nptrans.h"
23 #include "unicode/resbund.h"
24 #include "unicode/uniset.h"
25 #include "sprpimpl.h"
26 #include "cmemory.h"
27 #include "ustr_imp.h"
28 #include "intltest.h"
29
30 #ifdef DEBUG
31 #include <stdio.h>
32 #endif
33
34 const char NamePrepTransform::fgClassID=0;
35
36 //Factory method
createInstance(UParseError & parseError,UErrorCode & status)37 NamePrepTransform* NamePrepTransform::createInstance(UParseError& parseError, UErrorCode& status){
38 NamePrepTransform* transform = new NamePrepTransform(parseError, status);
39 if(U_FAILURE(status)){
40 delete transform;
41 return NULL;
42 }
43 return transform;
44 }
45
46 //constructor
NamePrepTransform(UParseError & parseError,UErrorCode & status)47 NamePrepTransform::NamePrepTransform(UParseError& parseError, UErrorCode& status)
48 : unassigned(), prohibited(), labelSeparatorSet(){
49
50 mapping = NULL;
51 bundle = NULL;
52
53
54 const char* testDataName = IntlTest::loadTestData(status);
55
56 if(U_FAILURE(status)){
57 return;
58 }
59
60 bundle = ures_openDirect(testDataName,"idna_rules",&status);
61
62 if(bundle != NULL && U_SUCCESS(status)){
63 // create the mapping transliterator
64 int32_t ruleLen = 0;
65 const UChar* ruleUChar = ures_getStringByKey(bundle, "MapNFKC",&ruleLen, &status);
66 int32_t mapRuleLen = 0;
67 const UChar *mapRuleUChar = ures_getStringByKey(bundle, "MapNoNormalization", &mapRuleLen, &status);
68 UnicodeString rule(mapRuleUChar, mapRuleLen);
69 rule.append(ruleUChar, ruleLen);
70
71 mapping = Transliterator::createFromRules(UnicodeString("NamePrepTransform", ""), rule,
72 UTRANS_FORWARD, parseError,status);
73 if(U_FAILURE(status)) {
74 return;
75 }
76
77 //create the unassigned set
78 int32_t patternLen =0;
79 const UChar* pattern = ures_getStringByKey(bundle,"UnassignedSet",&patternLen, &status);
80 unassigned.applyPattern(UnicodeString(pattern, patternLen), status);
81
82 //create prohibited set
83 patternLen=0;
84 pattern = ures_getStringByKey(bundle,"ProhibitedSet",&patternLen, &status);
85 UnicodeString test(pattern,patternLen);
86 prohibited.applyPattern(test,status);
87 #ifdef DEBUG
88 if(U_FAILURE(status)){
89 printf("Construction of Unicode set failed\n");
90 }
91
92 if(U_SUCCESS(status)){
93 if(prohibited.contains((UChar) 0x644)){
94 printf("The string contains 0x644 ... damn !!\n");
95 }
96 UnicodeString temp;
97 prohibited.toPattern(temp,TRUE);
98
99 for(int32_t i=0;i<temp.length();i++){
100 printf("%c", (char)temp.charAt(i));
101 }
102 printf("\n");
103 }
104 #endif
105
106 //create label separator set
107 patternLen=0;
108 pattern = ures_getStringByKey(bundle,"LabelSeparatorSet",&patternLen, &status);
109 labelSeparatorSet.applyPattern(UnicodeString(pattern,patternLen),status);
110 }
111
112 if(U_SUCCESS(status) &&
113 (mapping == NULL)
114 ){
115 status = U_MEMORY_ALLOCATION_ERROR;
116 delete mapping;
117 ures_close(bundle);
118 mapping = NULL;
119 bundle = NULL;
120 }
121
122 }
123
124
isProhibited(UChar32 ch)125 UBool NamePrepTransform::isProhibited(UChar32 ch){
126 return (UBool)(ch != ASCII_SPACE);
127 }
128
~NamePrepTransform()129 NamePrepTransform::~NamePrepTransform(){
130 delete mapping;
131 mapping = NULL;
132
133 //close the bundle
134 ures_close(bundle);
135 bundle = NULL;
136 }
137
138
map(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,UBool allowUnassigned,UParseError *,UErrorCode & status)139 int32_t NamePrepTransform::map(const UChar* src, int32_t srcLength,
140 UChar* dest, int32_t destCapacity,
141 UBool allowUnassigned,
142 UParseError* /*parseError*/,
143 UErrorCode& status ){
144
145 if(U_FAILURE(status)){
146 return 0;
147 }
148 //check arguments
149 if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
150 status=U_ILLEGAL_ARGUMENT_ERROR;
151 return 0;
152 }
153
154 UnicodeString rsource(src,srcLength);
155 // map the code points
156 // transliteration also performs NFKC
157 mapping->transliterate(rsource);
158
159 const UChar* buffer = rsource.getBuffer();
160 int32_t bufLen = rsource.length();
161 // check if unassigned
162 if(allowUnassigned == FALSE){
163 int32_t bufIndex=0;
164 UChar32 ch =0 ;
165 for(;bufIndex<bufLen;){
166 U16_NEXT(buffer, bufIndex, bufLen, ch);
167 if(unassigned.contains(ch)){
168 status = U_IDNA_UNASSIGNED_ERROR;
169 return 0;
170 }
171 }
172 }
173 // check if there is enough room in the output
174 if(bufLen < destCapacity){
175 uprv_memcpy(dest,buffer,bufLen*U_SIZEOF_UCHAR);
176 }
177
178 return u_terminateUChars(dest, destCapacity, bufLen, &status);
179 }
180
181
182 #define MAX_BUFFER_SIZE 300
183
process(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,UBool allowUnassigned,UParseError * parseError,UErrorCode & status)184 int32_t NamePrepTransform::process( const UChar* src, int32_t srcLength,
185 UChar* dest, int32_t destCapacity,
186 UBool allowUnassigned,
187 UParseError* parseError,
188 UErrorCode& status ){
189 // check error status
190 if(U_FAILURE(status)){
191 return 0;
192 }
193
194 //check arguments
195 if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
196 status=U_ILLEGAL_ARGUMENT_ERROR;
197 return 0;
198 }
199
200 UnicodeString b1String;
201 UChar *b1 = b1String.getBuffer(MAX_BUFFER_SIZE);
202 int32_t b1Len;
203
204 int32_t b1Index = 0;
205 UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
206 UBool leftToRight=FALSE, rightToLeft=FALSE;
207
208 b1Len = map(src, srcLength, b1, b1String.getCapacity(), allowUnassigned, parseError, status);
209 b1String.releaseBuffer(b1Len);
210
211 if(status == U_BUFFER_OVERFLOW_ERROR){
212 // redo processing of string
213 /* we do not have enough room so grow the buffer*/
214 b1 = b1String.getBuffer(b1Len);
215 status = U_ZERO_ERROR; // reset error
216 b1Len = map(src, srcLength, b1, b1String.getCapacity(), allowUnassigned, parseError, status);
217 b1String.releaseBuffer(b1Len);
218 }
219
220 if(U_FAILURE(status)){
221 b1Len = 0;
222 goto CLEANUP;
223 }
224
225
226 for(; b1Index<b1Len; ){
227
228 UChar32 ch = 0;
229
230 U16_NEXT(b1, b1Index, b1Len, ch);
231
232 if(prohibited.contains(ch) && ch!=0x0020){
233 status = U_IDNA_PROHIBITED_ERROR;
234 b1Len = 0;
235 goto CLEANUP;
236 }
237
238 direction = u_charDirection(ch);
239 if(firstCharDir==U_CHAR_DIRECTION_COUNT){
240 firstCharDir = direction;
241 }
242 if(direction == U_LEFT_TO_RIGHT){
243 leftToRight = TRUE;
244 }
245 if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
246 rightToLeft = TRUE;
247 }
248 }
249
250 // satisfy 2
251 if( leftToRight == TRUE && rightToLeft == TRUE){
252 status = U_IDNA_CHECK_BIDI_ERROR;
253 b1Len = 0;
254 goto CLEANUP;
255 }
256
257 //satisfy 3
258 if( rightToLeft == TRUE &&
259 !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
260 (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
261 ){
262 status = U_IDNA_CHECK_BIDI_ERROR;
263 return FALSE;
264 }
265
266 if(b1Len <= destCapacity){
267 uprv_memmove(dest,b1, b1Len*U_SIZEOF_UCHAR);
268 }
269
270 CLEANUP:
271 return u_terminateUChars(dest, destCapacity, b1Len, &status);
272 }
273
isLabelSeparator(UChar32 ch,UErrorCode & status)274 UBool NamePrepTransform::isLabelSeparator(UChar32 ch, UErrorCode& status){
275 // check error status
276 if(U_FAILURE(status)){
277 return FALSE;
278 }
279
280 return labelSeparatorSet.contains(ch);
281 }
282
283 #endif /* #if !UCONFIG_NO_IDNA */
284 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
285