• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 *
4 *   Copyright (C) 1999-2010, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *   file name:  unistr_case.cpp
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:2
12 *
13 *   created on: 2004aug19
14 *   created by: Markus W. Scherer
15 *
16 *   Case-mapping functions moved here from unistr.cpp
17 */
18 
19 #include "unicode/utypes.h"
20 #include "unicode/putil.h"
21 #include "unicode/locid.h"
22 #include "cstring.h"
23 #include "cmemory.h"
24 #include "unicode/ustring.h"
25 #include "unicode/unistr.h"
26 #include "unicode/uchar.h"
27 #include "unicode/ubrk.h"
28 #include "ustr_imp.h"
29 #include "uhash.h"
30 
31 U_NAMESPACE_BEGIN
32 
33 //========================================
34 // Read-only implementation
35 //========================================
36 
37 int8_t
doCaseCompare(int32_t start,int32_t length,const UChar * srcChars,int32_t srcStart,int32_t srcLength,uint32_t options) const38 UnicodeString::doCaseCompare(int32_t start,
39                              int32_t length,
40                              const UChar *srcChars,
41                              int32_t srcStart,
42                              int32_t srcLength,
43                              uint32_t options) const
44 {
45   // compare illegal string values
46   // treat const UChar *srcChars==NULL as an empty string
47   if(isBogus()) {
48     return -1;
49   }
50 
51   // pin indices to legal values
52   pinIndices(start, length);
53 
54   if(srcChars == NULL) {
55     srcStart = srcLength = 0;
56   }
57 
58   // get the correct pointer
59   const UChar *chars = getArrayStart();
60 
61   chars += start;
62   srcChars += srcStart;
63 
64   if(chars != srcChars) {
65     UErrorCode errorCode=U_ZERO_ERROR;
66     int32_t result=u_strcmpFold(chars, length, srcChars, srcLength,
67                                 options|U_COMPARE_IGNORE_CASE, &errorCode);
68     if(result!=0) {
69       return (int8_t)(result >> 24 | 1);
70     }
71   } else {
72     // get the srcLength if necessary
73     if(srcLength < 0) {
74       srcLength = u_strlen(srcChars + srcStart);
75     }
76     if(length != srcLength) {
77       return (int8_t)((length - srcLength) >> 24 | 1);
78     }
79   }
80   return 0;
81 }
82 
83 //========================================
84 // Write implementation
85 //========================================
86 
87 /*
88  * Implement argument checking and buffer handling
89  * for string case mapping as a common function.
90  */
91 
92 UnicodeString &
caseMap(BreakIterator * titleIter,const char * locale,uint32_t options,int32_t toWhichCase)93 UnicodeString::caseMap(BreakIterator *titleIter,
94                        const char *locale,
95                        uint32_t options,
96                        int32_t toWhichCase) {
97   if(isEmpty() || !isWritable()) {
98     // nothing to do
99     return *this;
100   }
101 
102   const UCaseProps *csp=ucase_getSingleton();
103 
104   // We need to allocate a new buffer for the internal string case mapping function.
105   // This is very similar to how doReplace() keeps the old array pointer
106   // and deletes the old array itself after it is done.
107   // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array.
108   UChar oldStackBuffer[US_STACKBUF_SIZE];
109   UChar *oldArray;
110   int32_t oldLength;
111 
112   if(fFlags&kUsingStackBuffer) {
113     // copy the stack buffer contents because it will be overwritten
114     u_memcpy(oldStackBuffer, fUnion.fStackBuffer, fShortLength);
115     oldArray = oldStackBuffer;
116     oldLength = fShortLength;
117   } else {
118     oldArray = getArrayStart();
119     oldLength = length();
120   }
121 
122   int32_t capacity;
123   if(oldLength <= US_STACKBUF_SIZE) {
124     capacity = US_STACKBUF_SIZE;
125   } else {
126     capacity = oldLength + 20;
127   }
128   int32_t *bufferToDelete = 0;
129   if(!cloneArrayIfNeeded(capacity, capacity, FALSE, &bufferToDelete, TRUE)) {
130     return *this;
131   }
132 
133   // Case-map, and if the result is too long, then reallocate and repeat.
134   UErrorCode errorCode;
135   int32_t newLength;
136   do {
137     errorCode = U_ZERO_ERROR;
138     if(toWhichCase==TO_LOWER) {
139       newLength = ustr_toLower(csp, getArrayStart(), getCapacity(),
140                                oldArray, oldLength,
141                                locale, &errorCode);
142     } else if(toWhichCase==TO_UPPER) {
143       newLength = ustr_toUpper(csp, getArrayStart(), getCapacity(),
144                                oldArray, oldLength,
145                                locale, &errorCode);
146     } else if(toWhichCase==TO_TITLE) {
147 #if UCONFIG_NO_BREAK_ITERATION
148         errorCode=U_UNSUPPORTED_ERROR;
149 #else
150       newLength = ustr_toTitle(csp, getArrayStart(), getCapacity(),
151                                oldArray, oldLength,
152                                (UBreakIterator *)titleIter, locale, options, &errorCode);
153 #endif
154     } else {
155       newLength = ustr_foldCase(csp, getArrayStart(), getCapacity(),
156                                 oldArray, oldLength,
157                                 options,
158                                 &errorCode);
159     }
160     setLength(newLength);
161   } while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(newLength, newLength, FALSE));
162 
163   if (bufferToDelete) {
164     uprv_free(bufferToDelete);
165   }
166   if(U_FAILURE(errorCode)) {
167     setToBogus();
168   }
169   return *this;
170 }
171 
172 UnicodeString &
toLower()173 UnicodeString::toLower() {
174   return caseMap(0, Locale::getDefault().getName(), 0, TO_LOWER);
175 }
176 
177 UnicodeString &
toLower(const Locale & locale)178 UnicodeString::toLower(const Locale &locale) {
179   return caseMap(0, locale.getName(), 0, TO_LOWER);
180 }
181 
182 UnicodeString &
toUpper()183 UnicodeString::toUpper() {
184   return caseMap(0, Locale::getDefault().getName(), 0, TO_UPPER);
185 }
186 
187 UnicodeString &
toUpper(const Locale & locale)188 UnicodeString::toUpper(const Locale &locale) {
189   return caseMap(0, locale.getName(), 0, TO_UPPER);
190 }
191 
192 #if !UCONFIG_NO_BREAK_ITERATION
193 
194 UnicodeString &
toTitle(BreakIterator * titleIter)195 UnicodeString::toTitle(BreakIterator *titleIter) {
196   return caseMap(titleIter, Locale::getDefault().getName(), 0, TO_TITLE);
197 }
198 
199 UnicodeString &
toTitle(BreakIterator * titleIter,const Locale & locale)200 UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale) {
201   return caseMap(titleIter, locale.getName(), 0, TO_TITLE);
202 }
203 
204 UnicodeString &
toTitle(BreakIterator * titleIter,const Locale & locale,uint32_t options)205 UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options) {
206   return caseMap(titleIter, locale.getName(), options, TO_TITLE);
207 }
208 
209 #endif
210 
211 UnicodeString &
foldCase(uint32_t options)212 UnicodeString::foldCase(uint32_t options) {
213     /* The Locale parameter isn't used. Use "" instead. */
214     return caseMap(0, "", options, FOLD_CASE);
215 }
216 
217 U_NAMESPACE_END
218 
219 // Defined here to reduce dependencies on break iterator
220 U_CAPI int32_t U_EXPORT2
uhash_hashCaselessUnicodeString(const UHashTok key)221 uhash_hashCaselessUnicodeString(const UHashTok key) {
222     U_NAMESPACE_USE
223     const UnicodeString *str = (const UnicodeString*) key.pointer;
224     if (str == NULL) {
225         return 0;
226     }
227     // Inefficient; a better way would be to have a hash function in
228     // UnicodeString that does case folding on the fly.
229     UnicodeString copy(*str);
230     return copy.foldCase().hashCode();
231 }
232 
233 // Defined here to reduce dependencies on break iterator
234 U_CAPI UBool U_EXPORT2
uhash_compareCaselessUnicodeString(const UHashTok key1,const UHashTok key2)235 uhash_compareCaselessUnicodeString(const UHashTok key1, const UHashTok key2) {
236     U_NAMESPACE_USE
237     const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
238     const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
239     if (str1 == str2) {
240         return TRUE;
241     }
242     if (str1 == NULL || str2 == NULL) {
243         return FALSE;
244     }
245     return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0;
246 }
247 
248