1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 1999-2014, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: unistr_case.cpp
11 * encoding: US-ASCII
12 * tab size: 8 (not used)
13 * indentation:2
14 *
15 * created on: 2004aug19
16 * created by: Markus W. Scherer
17 *
18 * Case-mapping functions moved here from unistr.cpp
19 */
20
21 #include "unicode/utypes.h"
22 #include "unicode/putil.h"
23 #include "cstring.h"
24 #include "cmemory.h"
25 #include "unicode/ustring.h"
26 #include "unicode/unistr.h"
27 #include "unicode/uchar.h"
28 #include "uelement.h"
29 #include "ustr_imp.h"
30
31 U_NAMESPACE_BEGIN
32
33 //========================================
34 // Read-only implementation
35 //========================================
36
37 int8_t
doCaseCompare(int32_t start,int32_t length,const UChar * srcChars,int32_t srcStart,int32_t srcLength,uint32_t options) const38 UnicodeString::doCaseCompare(int32_t start,
39 int32_t length,
40 const UChar *srcChars,
41 int32_t srcStart,
42 int32_t srcLength,
43 uint32_t options) const
44 {
45 // compare illegal string values
46 // treat const UChar *srcChars==NULL as an empty string
47 if(isBogus()) {
48 return -1;
49 }
50
51 // pin indices to legal values
52 pinIndices(start, length);
53
54 if(srcChars == NULL) {
55 srcStart = srcLength = 0;
56 }
57
58 // get the correct pointer
59 const UChar *chars = getArrayStart();
60
61 chars += start;
62 if(srcStart!=0) {
63 srcChars += srcStart;
64 }
65
66 if(chars != srcChars) {
67 UErrorCode errorCode=U_ZERO_ERROR;
68 int32_t result=u_strcmpFold(chars, length, srcChars, srcLength,
69 options|U_COMPARE_IGNORE_CASE, &errorCode);
70 if(result!=0) {
71 return (int8_t)(result >> 24 | 1);
72 }
73 } else {
74 // get the srcLength if necessary
75 if(srcLength < 0) {
76 srcLength = u_strlen(srcChars + srcStart);
77 }
78 if(length != srcLength) {
79 return (int8_t)((length - srcLength) >> 24 | 1);
80 }
81 }
82 return 0;
83 }
84
85 //========================================
86 // Write implementation
87 //========================================
88
89 UnicodeString &
caseMap(const UCaseMap * csm,UStringCaseMapper * stringCaseMapper)90 UnicodeString::caseMap(const UCaseMap *csm,
91 UStringCaseMapper *stringCaseMapper) {
92 if(isEmpty() || !isWritable()) {
93 // nothing to do
94 return *this;
95 }
96
97 // We need to allocate a new buffer for the internal string case mapping function.
98 // This is very similar to how doReplace() keeps the old array pointer
99 // and deletes the old array itself after it is done.
100 // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array.
101 UChar oldStackBuffer[US_STACKBUF_SIZE];
102 UChar *oldArray;
103 int32_t oldLength;
104
105 if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) {
106 // copy the stack buffer contents because it will be overwritten
107 oldArray = oldStackBuffer;
108 oldLength = getShortLength();
109 u_memcpy(oldStackBuffer, fUnion.fStackFields.fBuffer, oldLength);
110 } else {
111 oldArray = getArrayStart();
112 oldLength = length();
113 }
114
115 int32_t capacity;
116 if(oldLength <= US_STACKBUF_SIZE) {
117 capacity = US_STACKBUF_SIZE;
118 } else {
119 capacity = oldLength + 20;
120 }
121 int32_t *bufferToDelete = 0;
122 if(!cloneArrayIfNeeded(capacity, capacity, FALSE, &bufferToDelete, TRUE)) {
123 return *this;
124 }
125
126 // Case-map, and if the result is too long, then reallocate and repeat.
127 UErrorCode errorCode;
128 int32_t newLength;
129 do {
130 errorCode = U_ZERO_ERROR;
131 newLength = stringCaseMapper(csm, getArrayStart(), getCapacity(),
132 oldArray, oldLength, &errorCode);
133 setLength(newLength);
134 } while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(newLength, newLength, FALSE));
135
136 if (bufferToDelete) {
137 uprv_free(bufferToDelete);
138 }
139 if(U_FAILURE(errorCode)) {
140 setToBogus();
141 }
142 return *this;
143 }
144
145 UnicodeString &
foldCase(uint32_t options)146 UnicodeString::foldCase(uint32_t options) {
147 UCaseMap csm=UCASEMAP_INITIALIZER;
148 csm.csp=ucase_getSingleton();
149 csm.options=options;
150 return caseMap(&csm, ustrcase_internalFold);
151 }
152
153 U_NAMESPACE_END
154
155 // Defined here to reduce dependencies on break iterator
156 U_CAPI int32_t U_EXPORT2
uhash_hashCaselessUnicodeString(const UElement key)157 uhash_hashCaselessUnicodeString(const UElement key) {
158 U_NAMESPACE_USE
159 const UnicodeString *str = (const UnicodeString*) key.pointer;
160 if (str == NULL) {
161 return 0;
162 }
163 // Inefficient; a better way would be to have a hash function in
164 // UnicodeString that does case folding on the fly.
165 UnicodeString copy(*str);
166 return copy.foldCase().hashCode();
167 }
168
169 // Defined here to reduce dependencies on break iterator
170 U_CAPI UBool U_EXPORT2
uhash_compareCaselessUnicodeString(const UElement key1,const UElement key2)171 uhash_compareCaselessUnicodeString(const UElement key1, const UElement key2) {
172 U_NAMESPACE_USE
173 const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
174 const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
175 if (str1 == str2) {
176 return TRUE;
177 }
178 if (str1 == NULL || str2 == NULL) {
179 return FALSE;
180 }
181 return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0;
182 }
183