• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 *
4 *   Copyright (C) 1999-2007, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *   file name:  unistr_cnv.cpp
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:2
12 *
13 *   created on: 2004aug19
14 *   created by: Markus W. Scherer
15 *
16 *   Character conversion functions moved here from unistr.cpp
17 */
18 
19 #include "unicode/utypes.h"
20 
21 #if !UCONFIG_NO_CONVERSION
22 
23 #include "unicode/putil.h"
24 #include "cstring.h"
25 #include "cmemory.h"
26 #include "unicode/ustring.h"
27 #include "unicode/unistr.h"
28 #include "unicode/ucnv.h"
29 #include "putilimp.h"
30 #include "ustr_cnv.h"
31 #include "ustr_imp.h"
32 
33 U_NAMESPACE_BEGIN
34 
35 //========================================
36 // Constructors
37 //========================================
38 
UnicodeString(const char * codepageData,const char * codepage)39 UnicodeString::UnicodeString(const char *codepageData,
40                              const char *codepage)
41   : fLength(0),
42     fCapacity(US_STACKBUF_SIZE),
43     fArray(fStackBuffer),
44     fFlags(kShortString)
45 {
46     if(codepageData != 0) {
47         doCodepageCreate(codepageData, (int32_t)uprv_strlen(codepageData), codepage);
48     }
49 }
50 
51 
UnicodeString(const char * codepageData,int32_t dataLength,const char * codepage)52 UnicodeString::UnicodeString(const char *codepageData,
53                              int32_t dataLength,
54                              const char *codepage)
55   : fLength(0),
56     fCapacity(US_STACKBUF_SIZE),
57     fArray(fStackBuffer),
58     fFlags(kShortString)
59 {
60     if(codepageData != 0) {
61         doCodepageCreate(codepageData, dataLength, codepage);
62     }
63 }
64 
UnicodeString(const char * src,int32_t srcLength,UConverter * cnv,UErrorCode & errorCode)65 UnicodeString::UnicodeString(const char *src, int32_t srcLength,
66                              UConverter *cnv,
67                              UErrorCode &errorCode)
68   : fLength(0),
69     fCapacity(US_STACKBUF_SIZE),
70     fArray(fStackBuffer),
71     fFlags(kShortString)
72 {
73     if(U_SUCCESS(errorCode)) {
74         // check arguments
75         if(src==NULL) {
76             // treat as an empty string, do nothing more
77         } else if(srcLength<-1) {
78             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
79         } else {
80             // get input length
81             if(srcLength==-1) {
82                 srcLength=(int32_t)uprv_strlen(src);
83             }
84             if(srcLength>0) {
85                 if(cnv!=0) {
86                     // use the provided converter
87                     ucnv_resetToUnicode(cnv);
88                     doCodepageCreate(src, srcLength, cnv, errorCode);
89                 } else {
90                     // use the default converter
91                     cnv=u_getDefaultConverter(&errorCode);
92                     doCodepageCreate(src, srcLength, cnv, errorCode);
93                     u_releaseDefaultConverter(cnv);
94                 }
95             }
96         }
97 
98         if(U_FAILURE(errorCode)) {
99             setToBogus();
100         }
101     }
102 }
103 
104 //========================================
105 // Codeset conversion
106 //========================================
107 int32_t
extract(int32_t start,int32_t length,char * target,uint32_t dstSize,const char * codepage) const108 UnicodeString::extract(int32_t start,
109                        int32_t length,
110                        char *target,
111                        uint32_t dstSize,
112                        const char *codepage) const
113 {
114     // if the arguments are illegal, then do nothing
115     if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) {
116         return 0;
117     }
118 
119     // pin the indices to legal values
120     pinIndices(start, length);
121 
122     // create the converter
123     UConverter *converter;
124     UErrorCode status = U_ZERO_ERROR;
125 
126     // just write the NUL if the string length is 0
127     if(length == 0) {
128         if(dstSize >= 0x80000000) {
129             // careful: dstSize is unsigned! (0xffffffff means "unlimited")
130             // make sure that the NUL-termination works (takes int32_t)
131             dstSize=0x7fffffff;
132         }
133         return u_terminateChars(target, dstSize, 0, &status);
134     }
135 
136     // if the codepage is the default, use our cache
137     // if it is an empty string, then use the "invariant character" conversion
138     if (codepage == 0) {
139         converter = u_getDefaultConverter(&status);
140     } else if (*codepage == 0) {
141         // use the "invariant characters" conversion
142         int32_t destLength;
143         // careful: dstSize is unsigned! (0xffffffff means "unlimited")
144         if(dstSize >= 0x80000000) {
145             destLength = length;
146             // make sure that the NUL-termination works (takes int32_t)
147             dstSize=0x7fffffff;
148         } else if(length <= (int32_t)dstSize) {
149             destLength = length;
150         } else {
151             destLength = (int32_t)dstSize;
152         }
153         u_UCharsToChars(getArrayStart() + start, target, destLength);
154         return u_terminateChars(target, (int32_t)dstSize, length, &status);
155     } else {
156         converter = ucnv_open(codepage, &status);
157     }
158 
159     length = doExtract(start, length, target, (int32_t)dstSize, converter, status);
160 
161     // close the converter
162     if (codepage == 0) {
163         u_releaseDefaultConverter(converter);
164     } else {
165         ucnv_close(converter);
166     }
167 
168     return length;
169 }
170 
171 int32_t
extract(char * dest,int32_t destCapacity,UConverter * cnv,UErrorCode & errorCode) const172 UnicodeString::extract(char *dest, int32_t destCapacity,
173                        UConverter *cnv,
174                        UErrorCode &errorCode) const
175 {
176     if(U_FAILURE(errorCode)) {
177         return 0;
178     }
179 
180     if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
181         errorCode=U_ILLEGAL_ARGUMENT_ERROR;
182         return 0;
183     }
184 
185     // nothing to do?
186     if(fLength<=0) {
187         return u_terminateChars(dest, destCapacity, 0, &errorCode);
188     }
189 
190     // get the converter
191     UBool isDefaultConverter;
192     if(cnv==0) {
193         isDefaultConverter=TRUE;
194         cnv=u_getDefaultConverter(&errorCode);
195         if(U_FAILURE(errorCode)) {
196             return 0;
197         }
198     } else {
199         isDefaultConverter=FALSE;
200         ucnv_resetFromUnicode(cnv);
201     }
202 
203     // convert
204     int32_t length=doExtract(0, fLength, dest, destCapacity, cnv, errorCode);
205 
206     // release the converter
207     if(isDefaultConverter) {
208         u_releaseDefaultConverter(cnv);
209     }
210 
211     return length;
212 }
213 
214 int32_t
doExtract(int32_t start,int32_t length,char * dest,int32_t destCapacity,UConverter * cnv,UErrorCode & errorCode) const215 UnicodeString::doExtract(int32_t start, int32_t length,
216                          char *dest, int32_t destCapacity,
217                          UConverter *cnv,
218                          UErrorCode &errorCode) const
219 {
220     if(U_FAILURE(errorCode)) {
221         if(destCapacity!=0) {
222             *dest=0;
223         }
224         return 0;
225     }
226 
227     const UChar *src=fArray+start, *srcLimit=src+length;
228     char *originalDest=dest;
229     const char *destLimit;
230 
231     if(destCapacity==0) {
232         destLimit=dest=0;
233     } else if(destCapacity==-1) {
234         // Pin the limit to U_MAX_PTR if the "magic" destCapacity is used.
235         destLimit=(char*)U_MAX_PTR(dest);
236         // for NUL-termination, translate into highest int32_t
237         destCapacity=0x7fffffff;
238     } else {
239         destLimit=dest+destCapacity;
240     }
241 
242     // perform the conversion
243     ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &errorCode);
244     length=(int32_t)(dest-originalDest);
245 
246     // if an overflow occurs, then get the preflighting length
247     if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
248         char buffer[1024];
249 
250         destLimit=buffer+sizeof(buffer);
251         do {
252             dest=buffer;
253             errorCode=U_ZERO_ERROR;
254             ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &errorCode);
255             length+=(int32_t)(dest-buffer);
256         } while(errorCode==U_BUFFER_OVERFLOW_ERROR);
257     }
258 
259     return u_terminateChars(originalDest, destCapacity, length, &errorCode);
260 }
261 
262 void
doCodepageCreate(const char * codepageData,int32_t dataLength,const char * codepage)263 UnicodeString::doCodepageCreate(const char *codepageData,
264                                 int32_t dataLength,
265                                 const char *codepage)
266 {
267     // if there's nothing to convert, do nothing
268     if(codepageData == 0 || dataLength == 0 || dataLength < -1) {
269         return;
270     }
271     if(dataLength == -1) {
272         dataLength = (int32_t)uprv_strlen(codepageData);
273     }
274 
275     UErrorCode status = U_ZERO_ERROR;
276 
277     // create the converter
278     // if the codepage is the default, use our cache
279     // if it is an empty string, then use the "invariant character" conversion
280     UConverter *converter = (codepage == 0 ?
281                              u_getDefaultConverter(&status) :
282                              *codepage == 0 ?
283                                0 :
284                                ucnv_open(codepage, &status));
285 
286     // if we failed, set the appropriate flags and return
287     if(U_FAILURE(status)) {
288         setToBogus();
289         return;
290     }
291 
292     // perform the conversion
293     if(converter == 0) {
294         // use the "invariant characters" conversion
295         if(cloneArrayIfNeeded(dataLength, dataLength, FALSE)) {
296             u_charsToUChars(codepageData, getArrayStart(), dataLength);
297             fLength = dataLength;
298         } else {
299             setToBogus();
300         }
301         return;
302     }
303 
304     // convert using the real converter
305     doCodepageCreate(codepageData, dataLength, converter, status);
306     if(U_FAILURE(status)) {
307         setToBogus();
308     }
309 
310     // close the converter
311     if(codepage == 0) {
312         u_releaseDefaultConverter(converter);
313     } else {
314         ucnv_close(converter);
315     }
316 }
317 
318 void
doCodepageCreate(const char * codepageData,int32_t dataLength,UConverter * converter,UErrorCode & status)319 UnicodeString::doCodepageCreate(const char *codepageData,
320                                 int32_t dataLength,
321                                 UConverter *converter,
322                                 UErrorCode &status)
323 {
324     if(U_FAILURE(status)) {
325         return;
326     }
327 
328     // set up the conversion parameters
329     const char *mySource     = codepageData;
330     const char *mySourceEnd  = mySource + dataLength;
331     UChar *myTarget;
332 
333     // estimate the size needed:
334     // 1.25 UChar's per source byte should cover most cases
335     int32_t arraySize = dataLength + (dataLength >> 2);
336 
337     // we do not care about the current contents
338     UBool doCopyArray = FALSE;
339     for(;;) {
340         if(!cloneArrayIfNeeded(arraySize, arraySize, doCopyArray)) {
341             setToBogus();
342             break;
343         }
344 
345         // perform the conversion
346         myTarget = fArray + fLength;
347         ucnv_toUnicode(converter, &myTarget,  fArray + fCapacity,
348             &mySource, mySourceEnd, 0, TRUE, &status);
349 
350         // update the conversion parameters
351         fLength = (int32_t)(myTarget - fArray);
352 
353         // allocate more space and copy data, if needed
354         if(status == U_BUFFER_OVERFLOW_ERROR) {
355             // reset the error code
356             status = U_ZERO_ERROR;
357 
358             // keep the previous conversion results
359             doCopyArray = TRUE;
360 
361             // estimate the new size needed, larger than before
362             // try 2 UChar's per remaining source byte
363             arraySize = (int32_t)(fLength + 2 * (mySourceEnd - mySource));
364         } else {
365             break;
366         }
367     }
368 }
369 
370 U_NAMESPACE_END
371 
372 #endif
373