1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2003-2014, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: nptrans.h
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2003feb1
16 * created by: Ram Viswanadha
17 */
18
19 #include "unicode/utypes.h"
20
21 #if !UCONFIG_NO_TRANSLITERATION
22 #if !UCONFIG_NO_IDNA
23
24 #include "nptrans.h"
25 #include "unicode/resbund.h"
26 #include "unicode/uniset.h"
27 #include "sprpimpl.h"
28 #include "cmemory.h"
29 #include "ustr_imp.h"
30 #include "intltest.h"
31
32 #ifdef NPTRANS_DEBUG
33 #include <stdio.h>
34 #endif
35
36 const char NamePrepTransform::fgClassID=0;
37
38 //Factory method
createInstance(UParseError & parseError,UErrorCode & status)39 NamePrepTransform* NamePrepTransform::createInstance(UParseError& parseError, UErrorCode& status){
40 NamePrepTransform* transform = new NamePrepTransform(parseError, status);
41 if(U_FAILURE(status)){
42 delete transform;
43 return NULL;
44 }
45 return transform;
46 }
47
48 //constructor
NamePrepTransform(UParseError & parseError,UErrorCode & status)49 NamePrepTransform::NamePrepTransform(UParseError& parseError, UErrorCode& status)
50 : unassigned(), prohibited(), labelSeparatorSet(){
51
52 mapping = NULL;
53 bundle = NULL;
54
55
56 const char* testDataName = IntlTest::loadTestData(status);
57
58 if(U_FAILURE(status)){
59 return;
60 }
61
62 bundle = ures_openDirect(testDataName,"idna_rules",&status);
63
64 if(bundle != NULL && U_SUCCESS(status)){
65 // create the mapping transliterator
66 int32_t ruleLen = 0;
67 const UChar* ruleUChar = ures_getStringByKey(bundle, "MapNFKC",&ruleLen, &status);
68 int32_t mapRuleLen = 0;
69 const UChar *mapRuleUChar = ures_getStringByKey(bundle, "MapNoNormalization", &mapRuleLen, &status);
70 UnicodeString rule(mapRuleUChar, mapRuleLen);
71 rule.append(ruleUChar, ruleLen);
72
73 mapping = Transliterator::createFromRules(UnicodeString("NamePrepTransform", ""), rule,
74 UTRANS_FORWARD, parseError,status);
75 if(U_FAILURE(status)) {
76 return;
77 }
78
79 //create the unassigned set
80 int32_t patternLen =0;
81 const UChar* pattern = ures_getStringByKey(bundle,"UnassignedSet",&patternLen, &status);
82 unassigned.applyPattern(UnicodeString(pattern, patternLen), status);
83
84 //create prohibited set
85 patternLen=0;
86 pattern = ures_getStringByKey(bundle,"ProhibitedSet",&patternLen, &status);
87 UnicodeString test(pattern,patternLen);
88 prohibited.applyPattern(test,status);
89 #ifdef NPTRANS_DEBUG
90 if(U_FAILURE(status)){
91 printf("Construction of Unicode set failed\n");
92 }
93
94 if(U_SUCCESS(status)){
95 if(prohibited.contains((UChar) 0x644)){
96 printf("The string contains 0x644 ... !!\n");
97 }
98 UnicodeString temp;
99 prohibited.toPattern(temp,TRUE);
100
101 for(int32_t i=0;i<temp.length();i++){
102 printf("%c", (char)temp.charAt(i));
103 }
104 printf("\n");
105 }
106 #endif
107
108 //create label separator set
109 patternLen=0;
110 pattern = ures_getStringByKey(bundle,"LabelSeparatorSet",&patternLen, &status);
111 labelSeparatorSet.applyPattern(UnicodeString(pattern,patternLen),status);
112 }
113
114 if(U_SUCCESS(status) &&
115 (mapping == NULL)
116 ){
117 status = U_MEMORY_ALLOCATION_ERROR;
118 delete mapping;
119 ures_close(bundle);
120 mapping = NULL;
121 bundle = NULL;
122 }
123
124 }
125
126
isProhibited(UChar32 ch)127 UBool NamePrepTransform::isProhibited(UChar32 ch){
128 return (UBool)(ch != ASCII_SPACE);
129 }
130
~NamePrepTransform()131 NamePrepTransform::~NamePrepTransform(){
132 delete mapping;
133 mapping = NULL;
134
135 //close the bundle
136 ures_close(bundle);
137 bundle = NULL;
138 }
139
140
map(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,UBool allowUnassigned,UParseError *,UErrorCode & status)141 int32_t NamePrepTransform::map(const UChar* src, int32_t srcLength,
142 UChar* dest, int32_t destCapacity,
143 UBool allowUnassigned,
144 UParseError* /*parseError*/,
145 UErrorCode& status ){
146
147 if(U_FAILURE(status)){
148 return 0;
149 }
150 //check arguments
151 if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
152 status=U_ILLEGAL_ARGUMENT_ERROR;
153 return 0;
154 }
155
156 UnicodeString rsource(src,srcLength);
157 // map the code points
158 // transliteration also performs NFKC
159 mapping->transliterate(rsource);
160
161 const UChar* buffer = rsource.getBuffer();
162 int32_t bufLen = rsource.length();
163 // check if unassigned
164 if(allowUnassigned == FALSE){
165 int32_t bufIndex=0;
166 UChar32 ch =0 ;
167 for(;bufIndex<bufLen;){
168 U16_NEXT(buffer, bufIndex, bufLen, ch);
169 if(unassigned.contains(ch)){
170 status = U_IDNA_UNASSIGNED_ERROR;
171 return 0;
172 }
173 }
174 }
175 // check if there is enough room in the output
176 if(bufLen < destCapacity){
177 u_memcpy(dest, buffer, bufLen);
178 }
179
180 return u_terminateUChars(dest, destCapacity, bufLen, &status);
181 }
182
183
184 #define MAX_BUFFER_SIZE 300
185
process(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,UBool allowUnassigned,UParseError * parseError,UErrorCode & status)186 int32_t NamePrepTransform::process( const UChar* src, int32_t srcLength,
187 UChar* dest, int32_t destCapacity,
188 UBool allowUnassigned,
189 UParseError* parseError,
190 UErrorCode& status ){
191 // check error status
192 if(U_FAILURE(status)){
193 return 0;
194 }
195
196 //check arguments
197 if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
198 status=U_ILLEGAL_ARGUMENT_ERROR;
199 return 0;
200 }
201
202 UnicodeString b1String;
203 UChar *b1 = b1String.getBuffer(MAX_BUFFER_SIZE);
204 int32_t b1Len;
205
206 int32_t b1Index = 0;
207 UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
208 UBool leftToRight=FALSE, rightToLeft=FALSE;
209
210 b1Len = map(src, srcLength, b1, b1String.getCapacity(), allowUnassigned, parseError, status);
211 b1String.releaseBuffer(b1Len);
212
213 if(status == U_BUFFER_OVERFLOW_ERROR){
214 // redo processing of string
215 /* we do not have enough room so grow the buffer*/
216 b1 = b1String.getBuffer(b1Len);
217 status = U_ZERO_ERROR; // reset error
218 b1Len = map(src, srcLength, b1, b1String.getCapacity(), allowUnassigned, parseError, status);
219 b1String.releaseBuffer(b1Len);
220 }
221
222 if(U_FAILURE(status)){
223 b1Len = 0;
224 goto CLEANUP;
225 }
226
227
228 for(; b1Index<b1Len; ){
229
230 UChar32 ch = 0;
231
232 U16_NEXT(b1, b1Index, b1Len, ch);
233
234 if(prohibited.contains(ch) && ch!=0x0020){
235 status = U_IDNA_PROHIBITED_ERROR;
236 b1Len = 0;
237 goto CLEANUP;
238 }
239
240 direction = u_charDirection(ch);
241 if(firstCharDir==U_CHAR_DIRECTION_COUNT){
242 firstCharDir = direction;
243 }
244 if(direction == U_LEFT_TO_RIGHT){
245 leftToRight = TRUE;
246 }
247 if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
248 rightToLeft = TRUE;
249 }
250 }
251
252 // satisfy 2
253 if( leftToRight == TRUE && rightToLeft == TRUE){
254 status = U_IDNA_CHECK_BIDI_ERROR;
255 b1Len = 0;
256 goto CLEANUP;
257 }
258
259 //satisfy 3
260 if( rightToLeft == TRUE &&
261 !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
262 (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
263 ){
264 status = U_IDNA_CHECK_BIDI_ERROR;
265 return FALSE;
266 }
267
268 if(b1Len <= destCapacity){
269 u_memmove(dest, b1, b1Len);
270 }
271
272 CLEANUP:
273 return u_terminateUChars(dest, destCapacity, b1Len, &status);
274 }
275
isLabelSeparator(UChar32 ch,UErrorCode & status)276 UBool NamePrepTransform::isLabelSeparator(UChar32 ch, UErrorCode& status){
277 // check error status
278 if(U_FAILURE(status)){
279 return FALSE;
280 }
281
282 return labelSeparatorSet.contains(ch);
283 }
284
285 #endif /* #if !UCONFIG_NO_IDNA */
286 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
287