• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *******************************************************************************
3  *
4  *   Copyright (C) 2003-2010, International Business Machines
5  *   Corporation and others.  All Rights Reserved.
6  *
7  *******************************************************************************
8  *   file name:  nptrans.h
9  *   encoding:   US-ASCII
10  *   tab size:   8 (not used)
11  *   indentation:4
12  *
13  *   created on: 2003feb1
14  *   created by: Ram Viswanadha
15  */
16 
17 #include "unicode/utypes.h"
18 
19 #if !UCONFIG_NO_TRANSLITERATION
20 #if !UCONFIG_NO_IDNA
21 
22 #include "nptrans.h"
23 #include "unicode/resbund.h"
24 #include "unicode/uniset.h"
25 #include "sprpimpl.h"
26 #include "cmemory.h"
27 #include "ustr_imp.h"
28 #include "intltest.h"
29 
30 #ifdef DEBUG
31 #include <stdio.h>
32 #endif
33 
34 const char NamePrepTransform::fgClassID=0;
35 
36 //Factory method
createInstance(UParseError & parseError,UErrorCode & status)37 NamePrepTransform* NamePrepTransform::createInstance(UParseError& parseError, UErrorCode& status){
38     NamePrepTransform* transform = new NamePrepTransform(parseError, status);
39     if(U_FAILURE(status)){
40         delete transform;
41         return NULL;
42     }
43     return transform;
44 }
45 
46 //constructor
NamePrepTransform(UParseError & parseError,UErrorCode & status)47 NamePrepTransform::NamePrepTransform(UParseError& parseError, UErrorCode& status)
48 : unassigned(), prohibited(), labelSeparatorSet(){
49 
50     mapping = NULL;
51     bundle = NULL;
52 
53 
54     const char* testDataName = IntlTest::loadTestData(status);
55 
56     if(U_FAILURE(status)){
57         return;
58     }
59 
60     bundle = ures_openDirect(testDataName,"idna_rules",&status);
61 
62     if(bundle != NULL && U_SUCCESS(status)){
63         // create the mapping transliterator
64         int32_t ruleLen = 0;
65         const UChar* ruleUChar = ures_getStringByKey(bundle, "MapNFKC",&ruleLen, &status);
66         int32_t mapRuleLen = 0;
67         const UChar *mapRuleUChar = ures_getStringByKey(bundle, "MapNoNormalization", &mapRuleLen, &status);
68         UnicodeString rule(mapRuleUChar, mapRuleLen);
69         rule.append(ruleUChar, ruleLen);
70 
71         mapping = Transliterator::createFromRules(UnicodeString("NamePrepTransform", ""), rule,
72                                                    UTRANS_FORWARD, parseError,status);
73         if(U_FAILURE(status)) {
74           return;
75         }
76 
77         //create the unassigned set
78         int32_t patternLen =0;
79         const UChar* pattern = ures_getStringByKey(bundle,"UnassignedSet",&patternLen, &status);
80         unassigned.applyPattern(UnicodeString(pattern, patternLen), status);
81 
82         //create prohibited set
83         patternLen=0;
84         pattern =  ures_getStringByKey(bundle,"ProhibitedSet",&patternLen, &status);
85         UnicodeString test(pattern,patternLen);
86         prohibited.applyPattern(test,status);
87 #ifdef DEBUG
88         if(U_FAILURE(status)){
89             printf("Construction of Unicode set failed\n");
90         }
91 
92         if(U_SUCCESS(status)){
93             if(prohibited.contains((UChar) 0x644)){
94                 printf("The string contains 0x644 ... damn !!\n");
95             }
96             UnicodeString temp;
97             prohibited.toPattern(temp,TRUE);
98 
99             for(int32_t i=0;i<temp.length();i++){
100                 printf("%c", (char)temp.charAt(i));
101             }
102             printf("\n");
103         }
104 #endif
105 
106         //create label separator set
107         patternLen=0;
108         pattern =  ures_getStringByKey(bundle,"LabelSeparatorSet",&patternLen, &status);
109         labelSeparatorSet.applyPattern(UnicodeString(pattern,patternLen),status);
110     }
111 
112     if(U_SUCCESS(status) &&
113         (mapping == NULL)
114       ){
115         status = U_MEMORY_ALLOCATION_ERROR;
116         delete mapping;
117         ures_close(bundle);
118         mapping = NULL;
119         bundle = NULL;
120     }
121 
122 }
123 
124 
isProhibited(UChar32 ch)125 UBool NamePrepTransform::isProhibited(UChar32 ch){
126     return (UBool)(ch != ASCII_SPACE);
127 }
128 
~NamePrepTransform()129 NamePrepTransform::~NamePrepTransform(){
130     delete mapping;
131     mapping = NULL;
132 
133     //close the bundle
134     ures_close(bundle);
135     bundle = NULL;
136 }
137 
138 
map(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,UBool allowUnassigned,UParseError *,UErrorCode & status)139 int32_t NamePrepTransform::map(const UChar* src, int32_t srcLength,
140                         UChar* dest, int32_t destCapacity,
141                         UBool allowUnassigned,
142                         UParseError* /*parseError*/,
143                         UErrorCode& status ){
144 
145     if(U_FAILURE(status)){
146         return 0;
147     }
148     //check arguments
149     if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
150         status=U_ILLEGAL_ARGUMENT_ERROR;
151         return 0;
152     }
153 
154     UnicodeString rsource(src,srcLength);
155     // map the code points
156     // transliteration also performs NFKC
157     mapping->transliterate(rsource);
158 
159     const UChar* buffer = rsource.getBuffer();
160     int32_t bufLen = rsource.length();
161     // check if unassigned
162     if(allowUnassigned == FALSE){
163         int32_t bufIndex=0;
164         UChar32 ch =0 ;
165         for(;bufIndex<bufLen;){
166             U16_NEXT(buffer, bufIndex, bufLen, ch);
167             if(unassigned.contains(ch)){
168                 status = U_IDNA_UNASSIGNED_ERROR;
169                 return 0;
170             }
171         }
172     }
173     // check if there is enough room in the output
174     if(bufLen < destCapacity){
175         uprv_memcpy(dest,buffer,bufLen*U_SIZEOF_UCHAR);
176     }
177 
178     return u_terminateUChars(dest, destCapacity, bufLen, &status);
179 }
180 
181 
182 #define MAX_BUFFER_SIZE 300
183 
process(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,UBool allowUnassigned,UParseError * parseError,UErrorCode & status)184 int32_t NamePrepTransform::process( const UChar* src, int32_t srcLength,
185                                     UChar* dest, int32_t destCapacity,
186                                     UBool allowUnassigned,
187                                     UParseError* parseError,
188                                     UErrorCode& status ){
189     // check error status
190     if(U_FAILURE(status)){
191         return 0;
192     }
193 
194     //check arguments
195     if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
196         status=U_ILLEGAL_ARGUMENT_ERROR;
197         return 0;
198     }
199 
200     UnicodeString b1String;
201     UChar *b1 = b1String.getBuffer(MAX_BUFFER_SIZE);
202     int32_t b1Len;
203 
204     int32_t b1Index = 0;
205     UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
206     UBool leftToRight=FALSE, rightToLeft=FALSE;
207 
208     b1Len = map(src, srcLength, b1, b1String.getCapacity(), allowUnassigned, parseError, status);
209     b1String.releaseBuffer(b1Len);
210 
211     if(status == U_BUFFER_OVERFLOW_ERROR){
212         // redo processing of string
213         /* we do not have enough room so grow the buffer*/
214         b1 = b1String.getBuffer(b1Len);
215         status = U_ZERO_ERROR; // reset error
216         b1Len = map(src, srcLength, b1, b1String.getCapacity(), allowUnassigned, parseError, status);
217         b1String.releaseBuffer(b1Len);
218     }
219 
220     if(U_FAILURE(status)){
221         b1Len = 0;
222         goto CLEANUP;
223     }
224 
225 
226     for(; b1Index<b1Len; ){
227 
228         UChar32 ch = 0;
229 
230         U16_NEXT(b1, b1Index, b1Len, ch);
231 
232         if(prohibited.contains(ch) && ch!=0x0020){
233             status = U_IDNA_PROHIBITED_ERROR;
234             b1Len = 0;
235             goto CLEANUP;
236         }
237 
238         direction = u_charDirection(ch);
239         if(firstCharDir==U_CHAR_DIRECTION_COUNT){
240             firstCharDir = direction;
241         }
242         if(direction == U_LEFT_TO_RIGHT){
243             leftToRight = TRUE;
244         }
245         if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
246             rightToLeft = TRUE;
247         }
248     }
249 
250     // satisfy 2
251     if( leftToRight == TRUE && rightToLeft == TRUE){
252         status = U_IDNA_CHECK_BIDI_ERROR;
253         b1Len = 0;
254         goto CLEANUP;
255     }
256 
257     //satisfy 3
258     if( rightToLeft == TRUE &&
259         !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
260           (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
261        ){
262         status = U_IDNA_CHECK_BIDI_ERROR;
263         return FALSE;
264     }
265 
266     if(b1Len <= destCapacity){
267         uprv_memmove(dest,b1, b1Len*U_SIZEOF_UCHAR);
268     }
269 
270 CLEANUP:
271     return u_terminateUChars(dest, destCapacity, b1Len, &status);
272 }
273 
isLabelSeparator(UChar32 ch,UErrorCode & status)274 UBool NamePrepTransform::isLabelSeparator(UChar32 ch, UErrorCode& status){
275     // check error status
276     if(U_FAILURE(status)){
277         return FALSE;
278     }
279 
280     return labelSeparatorSet.contains(ch);
281 }
282 
283 #endif /* #if !UCONFIG_NO_IDNA */
284 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
285