1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 #include "unicode/utypes.h"
5
6 #if !UCONFIG_NO_FORMATTING
7
8 #include "formatted_string_builder.h"
9 #include "putilimp.h"
10 #include "unicode/ustring.h"
11 #include "unicode/utf16.h"
12 #include "unicode/unum.h" // for UNumberFormatFields literals
13
14 namespace {
15
16 // A version of uprv_memcpy that checks for length 0.
17 // By default, uprv_memcpy requires a length of at least 1.
uprv_memcpy2(void * dest,const void * src,size_t len)18 inline void uprv_memcpy2(void* dest, const void* src, size_t len) {
19 if (len > 0) {
20 uprv_memcpy(dest, src, len);
21 }
22 }
23
24 // A version of uprv_memmove that checks for length 0.
25 // By default, uprv_memmove requires a length of at least 1.
uprv_memmove2(void * dest,const void * src,size_t len)26 inline void uprv_memmove2(void* dest, const void* src, size_t len) {
27 if (len > 0) {
28 uprv_memmove(dest, src, len);
29 }
30 }
31
32 } // namespace
33
34
35 U_NAMESPACE_BEGIN
36
FormattedStringBuilder()37 FormattedStringBuilder::FormattedStringBuilder() {
38 #if U_DEBUG
39 // Initializing the memory to non-zero helps catch some bugs that involve
40 // reading from an improperly terminated string.
41 for (int32_t i=0; i<getCapacity(); i++) {
42 getCharPtr()[i] = 1;
43 }
44 #endif
45 }
46
~FormattedStringBuilder()47 FormattedStringBuilder::~FormattedStringBuilder() {
48 if (fUsingHeap) {
49 uprv_free(fChars.heap.ptr);
50 uprv_free(fFields.heap.ptr);
51 }
52 }
53
FormattedStringBuilder(const FormattedStringBuilder & other)54 FormattedStringBuilder::FormattedStringBuilder(const FormattedStringBuilder &other) {
55 *this = other;
56 }
57
operator =(const FormattedStringBuilder & other)58 FormattedStringBuilder &FormattedStringBuilder::operator=(const FormattedStringBuilder &other) {
59 // Check for self-assignment
60 if (this == &other) {
61 return *this;
62 }
63
64 // Continue with deallocation and copying
65 if (fUsingHeap) {
66 uprv_free(fChars.heap.ptr);
67 uprv_free(fFields.heap.ptr);
68 fUsingHeap = false;
69 }
70
71 int32_t capacity = other.getCapacity();
72 if (capacity > DEFAULT_CAPACITY) {
73 // FIXME: uprv_malloc
74 // C++ note: malloc appears in two places: here and in prepareForInsertHelper.
75 auto* newChars = static_cast<char16_t*>(uprv_malloc(sizeof(char16_t) * capacity));
76 auto* newFields = static_cast<Field*>(uprv_malloc(sizeof(Field) * capacity));
77 if (newChars == nullptr || newFields == nullptr) {
78 // UErrorCode is not available; fail silently.
79 uprv_free(newChars);
80 uprv_free(newFields);
81 *this = FormattedStringBuilder(); // can't fail
82 return *this;
83 }
84
85 fUsingHeap = true;
86 fChars.heap.capacity = capacity;
87 fChars.heap.ptr = newChars;
88 fFields.heap.capacity = capacity;
89 fFields.heap.ptr = newFields;
90 }
91
92 uprv_memcpy2(getCharPtr(), other.getCharPtr(), sizeof(char16_t) * capacity);
93 uprv_memcpy2(getFieldPtr(), other.getFieldPtr(), sizeof(Field) * capacity);
94
95 fZero = other.fZero;
96 fLength = other.fLength;
97 return *this;
98 }
99
length() const100 int32_t FormattedStringBuilder::length() const {
101 return fLength;
102 }
103
codePointCount() const104 int32_t FormattedStringBuilder::codePointCount() const {
105 return u_countChar32(getCharPtr() + fZero, fLength);
106 }
107
getFirstCodePoint() const108 UChar32 FormattedStringBuilder::getFirstCodePoint() const {
109 if (fLength == 0) {
110 return -1;
111 }
112 UChar32 cp;
113 U16_GET(getCharPtr() + fZero, 0, 0, fLength, cp);
114 return cp;
115 }
116
getLastCodePoint() const117 UChar32 FormattedStringBuilder::getLastCodePoint() const {
118 if (fLength == 0) {
119 return -1;
120 }
121 int32_t offset = fLength;
122 U16_BACK_1(getCharPtr() + fZero, 0, offset);
123 UChar32 cp;
124 U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp);
125 return cp;
126 }
127
codePointAt(int32_t index) const128 UChar32 FormattedStringBuilder::codePointAt(int32_t index) const {
129 UChar32 cp;
130 U16_GET(getCharPtr() + fZero, 0, index, fLength, cp);
131 return cp;
132 }
133
codePointBefore(int32_t index) const134 UChar32 FormattedStringBuilder::codePointBefore(int32_t index) const {
135 int32_t offset = index;
136 U16_BACK_1(getCharPtr() + fZero, 0, offset);
137 UChar32 cp;
138 U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp);
139 return cp;
140 }
141
clear()142 FormattedStringBuilder &FormattedStringBuilder::clear() {
143 // TODO: Reset the heap here?
144 fZero = getCapacity() / 2;
145 fLength = 0;
146 return *this;
147 }
148
149 int32_t
insertCodePoint(int32_t index,UChar32 codePoint,Field field,UErrorCode & status)150 FormattedStringBuilder::insertCodePoint(int32_t index, UChar32 codePoint, Field field, UErrorCode &status) {
151 int32_t count = U16_LENGTH(codePoint);
152 int32_t position = prepareForInsert(index, count, status);
153 if (U_FAILURE(status)) {
154 return count;
155 }
156 auto* charPtr = getCharPtr();
157 auto* fieldPtr = getFieldPtr();
158 if (count == 1) {
159 charPtr[position] = static_cast<char16_t>(codePoint);
160 fieldPtr[position] = field;
161 } else {
162 charPtr[position] = U16_LEAD(codePoint);
163 charPtr[position + 1] = U16_TRAIL(codePoint);
164 fieldPtr[position] = fieldPtr[position + 1] = field;
165 }
166 return count;
167 }
168
insert(int32_t index,const UnicodeString & unistr,Field field,UErrorCode & status)169 int32_t FormattedStringBuilder::insert(int32_t index, const UnicodeString &unistr, Field field,
170 UErrorCode &status) {
171 if (unistr.length() == 0) {
172 // Nothing to insert.
173 return 0;
174 } else if (unistr.length() == 1) {
175 // Fast path: insert using insertCodePoint.
176 return insertCodePoint(index, unistr.charAt(0), field, status);
177 } else {
178 return insert(index, unistr, 0, unistr.length(), field, status);
179 }
180 }
181
182 int32_t
insert(int32_t index,const UnicodeString & unistr,int32_t start,int32_t end,Field field,UErrorCode & status)183 FormattedStringBuilder::insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end,
184 Field field, UErrorCode &status) {
185 int32_t count = end - start;
186 int32_t position = prepareForInsert(index, count, status);
187 if (U_FAILURE(status)) {
188 return count;
189 }
190 for (int32_t i = 0; i < count; i++) {
191 getCharPtr()[position + i] = unistr.charAt(start + i);
192 getFieldPtr()[position + i] = field;
193 }
194 return count;
195 }
196
197 int32_t
splice(int32_t startThis,int32_t endThis,const UnicodeString & unistr,int32_t startOther,int32_t endOther,Field field,UErrorCode & status)198 FormattedStringBuilder::splice(int32_t startThis, int32_t endThis, const UnicodeString &unistr,
199 int32_t startOther, int32_t endOther, Field field, UErrorCode& status) {
200 int32_t thisLength = endThis - startThis;
201 int32_t otherLength = endOther - startOther;
202 int32_t count = otherLength - thisLength;
203 if (U_FAILURE(status)) {
204 return count;
205 }
206 int32_t position;
207 if (count > 0) {
208 // Overall, chars need to be added.
209 position = prepareForInsert(startThis, count, status);
210 } else {
211 // Overall, chars need to be removed or kept the same.
212 position = remove(startThis, -count);
213 }
214 if (U_FAILURE(status)) {
215 return count;
216 }
217 for (int32_t i = 0; i < otherLength; i++) {
218 getCharPtr()[position + i] = unistr.charAt(startOther + i);
219 getFieldPtr()[position + i] = field;
220 }
221 return count;
222 }
223
append(const FormattedStringBuilder & other,UErrorCode & status)224 int32_t FormattedStringBuilder::append(const FormattedStringBuilder &other, UErrorCode &status) {
225 return insert(fLength, other, status);
226 }
227
228 int32_t
insert(int32_t index,const FormattedStringBuilder & other,UErrorCode & status)229 FormattedStringBuilder::insert(int32_t index, const FormattedStringBuilder &other, UErrorCode &status) {
230 if (U_FAILURE(status)) {
231 return 0;
232 }
233 if (this == &other) {
234 status = U_ILLEGAL_ARGUMENT_ERROR;
235 return 0;
236 }
237 int32_t count = other.fLength;
238 if (count == 0) {
239 // Nothing to insert.
240 return 0;
241 }
242 int32_t position = prepareForInsert(index, count, status);
243 if (U_FAILURE(status)) {
244 return count;
245 }
246 for (int32_t i = 0; i < count; i++) {
247 getCharPtr()[position + i] = other.charAt(i);
248 getFieldPtr()[position + i] = other.fieldAt(i);
249 }
250 return count;
251 }
252
writeTerminator(UErrorCode & status)253 void FormattedStringBuilder::writeTerminator(UErrorCode& status) {
254 int32_t position = prepareForInsert(fLength, 1, status);
255 if (U_FAILURE(status)) {
256 return;
257 }
258 getCharPtr()[position] = 0;
259 getFieldPtr()[position] = kUndefinedField;
260 fLength--;
261 }
262
prepareForInsert(int32_t index,int32_t count,UErrorCode & status)263 int32_t FormattedStringBuilder::prepareForInsert(int32_t index, int32_t count, UErrorCode &status) {
264 U_ASSERT(index >= 0);
265 U_ASSERT(index <= fLength);
266 U_ASSERT(count >= 0);
267 U_ASSERT(fZero >= 0);
268 U_ASSERT(fLength >= 0);
269 U_ASSERT(getCapacity() - fZero >= fLength);
270 if (U_FAILURE(status)) {
271 return count;
272 }
273 if (index == 0 && fZero - count >= 0) {
274 // Append to start
275 fZero -= count;
276 fLength += count;
277 return fZero;
278 } else if (index == fLength && count <= getCapacity() - fZero - fLength) {
279 // Append to end
280 fLength += count;
281 return fZero + fLength - count;
282 } else {
283 // Move chars around and/or allocate more space
284 return prepareForInsertHelper(index, count, status);
285 }
286 }
287
prepareForInsertHelper(int32_t index,int32_t count,UErrorCode & status)288 int32_t FormattedStringBuilder::prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status) {
289 int32_t oldCapacity = getCapacity();
290 int32_t oldZero = fZero;
291 char16_t *oldChars = getCharPtr();
292 Field *oldFields = getFieldPtr();
293 int32_t newLength;
294 if (uprv_add32_overflow(fLength, count, &newLength)) {
295 status = U_INPUT_TOO_LONG_ERROR;
296 return -1;
297 }
298 int32_t newZero;
299 if (newLength > oldCapacity) {
300 if (newLength > INT32_MAX / 2) {
301 // We do not support more than 1G char16_t in this code because
302 // dealing with >2G *bytes* can cause subtle bugs.
303 status = U_INPUT_TOO_LONG_ERROR;
304 return -1;
305 }
306 // Keep newCapacity also to at most 1G char16_t.
307 int32_t newCapacity = newLength * 2;
308 newZero = (newCapacity - newLength) / 2;
309
310 // C++ note: malloc appears in two places: here and in the assignment operator.
311 auto* newChars =
312 static_cast<char16_t*>(uprv_malloc(sizeof(char16_t) * static_cast<size_t>(newCapacity)));
313 auto* newFields =
314 static_cast<Field*>(uprv_malloc(sizeof(Field) * static_cast<size_t>(newCapacity)));
315 if (newChars == nullptr || newFields == nullptr) {
316 uprv_free(newChars);
317 uprv_free(newFields);
318 status = U_MEMORY_ALLOCATION_ERROR;
319 return -1;
320 }
321
322 // First copy the prefix and then the suffix, leaving room for the new chars that the
323 // caller wants to insert.
324 // C++ note: memcpy is OK because the src and dest do not overlap.
325 uprv_memcpy2(newChars + newZero, oldChars + oldZero, sizeof(char16_t) * index);
326 uprv_memcpy2(newChars + newZero + index + count,
327 oldChars + oldZero + index,
328 sizeof(char16_t) * (fLength - index));
329 uprv_memcpy2(newFields + newZero, oldFields + oldZero, sizeof(Field) * index);
330 uprv_memcpy2(newFields + newZero + index + count,
331 oldFields + oldZero + index,
332 sizeof(Field) * (fLength - index));
333
334 if (fUsingHeap) {
335 uprv_free(oldChars);
336 uprv_free(oldFields);
337 }
338 fUsingHeap = true;
339 fChars.heap.ptr = newChars;
340 fChars.heap.capacity = newCapacity;
341 fFields.heap.ptr = newFields;
342 fFields.heap.capacity = newCapacity;
343 } else {
344 newZero = (oldCapacity - newLength) / 2;
345
346 // C++ note: memmove is required because src and dest may overlap.
347 // First copy the entire string to the location of the prefix, and then move the suffix
348 // to make room for the new chars that the caller wants to insert.
349 uprv_memmove2(oldChars + newZero, oldChars + oldZero, sizeof(char16_t) * fLength);
350 uprv_memmove2(oldChars + newZero + index + count,
351 oldChars + newZero + index,
352 sizeof(char16_t) * (fLength - index));
353 uprv_memmove2(oldFields + newZero, oldFields + oldZero, sizeof(Field) * fLength);
354 uprv_memmove2(oldFields + newZero + index + count,
355 oldFields + newZero + index,
356 sizeof(Field) * (fLength - index));
357 }
358 fZero = newZero;
359 fLength = newLength;
360 return fZero + index;
361 }
362
remove(int32_t index,int32_t count)363 int32_t FormattedStringBuilder::remove(int32_t index, int32_t count) {
364 U_ASSERT(0 <= index);
365 U_ASSERT(index <= fLength);
366 U_ASSERT(count <= (fLength - index));
367 U_ASSERT(index <= getCapacity() - fZero);
368
369 int32_t position = index + fZero;
370 // TODO: Reset the heap here? (If the string after removal can fit on stack?)
371 uprv_memmove2(getCharPtr() + position,
372 getCharPtr() + position + count,
373 sizeof(char16_t) * (fLength - index - count));
374 uprv_memmove2(getFieldPtr() + position,
375 getFieldPtr() + position + count,
376 sizeof(Field) * (fLength - index - count));
377 fLength -= count;
378 return position;
379 }
380
toUnicodeString() const381 UnicodeString FormattedStringBuilder::toUnicodeString() const {
382 return UnicodeString(getCharPtr() + fZero, fLength);
383 }
384
toTempUnicodeString() const385 UnicodeString FormattedStringBuilder::toTempUnicodeString() const {
386 // Readonly-alias constructor:
387 return UnicodeString(false, getCharPtr() + fZero, fLength);
388 }
389
toDebugString() const390 UnicodeString FormattedStringBuilder::toDebugString() const {
391 UnicodeString sb;
392 sb.append(u"<FormattedStringBuilder [", -1);
393 sb.append(toUnicodeString());
394 sb.append(u"] [", -1);
395 for (int i = 0; i < fLength; i++) {
396 if (fieldAt(i) == kUndefinedField) {
397 sb.append(u'n');
398 } else if (fieldAt(i).getCategory() == UFIELD_CATEGORY_NUMBER) {
399 char16_t c;
400 switch (fieldAt(i).getField()) {
401 case UNUM_SIGN_FIELD:
402 c = u'-';
403 break;
404 case UNUM_INTEGER_FIELD:
405 c = u'i';
406 break;
407 case UNUM_FRACTION_FIELD:
408 c = u'f';
409 break;
410 case UNUM_EXPONENT_FIELD:
411 c = u'e';
412 break;
413 case UNUM_EXPONENT_SIGN_FIELD:
414 c = u'+';
415 break;
416 case UNUM_EXPONENT_SYMBOL_FIELD:
417 c = u'E';
418 break;
419 case UNUM_DECIMAL_SEPARATOR_FIELD:
420 c = u'.';
421 break;
422 case UNUM_GROUPING_SEPARATOR_FIELD:
423 c = u',';
424 break;
425 case UNUM_PERCENT_FIELD:
426 c = u'%';
427 break;
428 case UNUM_PERMILL_FIELD:
429 c = u'‰';
430 break;
431 case UNUM_CURRENCY_FIELD:
432 c = u'$';
433 break;
434 default:
435 c = u'0' + fieldAt(i).getField();
436 break;
437 }
438 sb.append(c);
439 } else {
440 sb.append(u'0' + fieldAt(i).getCategory());
441 }
442 }
443 sb.append(u"]>", -1);
444 return sb;
445 }
446
chars() const447 const char16_t *FormattedStringBuilder::chars() const {
448 return getCharPtr() + fZero;
449 }
450
contentEquals(const FormattedStringBuilder & other) const451 bool FormattedStringBuilder::contentEquals(const FormattedStringBuilder &other) const {
452 if (fLength != other.fLength) {
453 return false;
454 }
455 for (int32_t i = 0; i < fLength; i++) {
456 if (charAt(i) != other.charAt(i) || fieldAt(i) != other.fieldAt(i)) {
457 return false;
458 }
459 }
460 return true;
461 }
462
containsField(Field field) const463 bool FormattedStringBuilder::containsField(Field field) const {
464 for (int32_t i = 0; i < fLength; i++) {
465 if (field == fieldAt(i)) {
466 return true;
467 }
468 }
469 return false;
470 }
471
472 U_NAMESPACE_END
473
474 #endif /* #if !UCONFIG_NO_FORMATTING */
475