1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 #include "unicode/utypes.h"
5
6 #if !UCONFIG_NO_FORMATTING
7
8 #include "formatted_string_builder.h"
9 #include "putilimp.h"
10 #include "unicode/ustring.h"
11 #include "unicode/utf16.h"
12 #include "unicode/unum.h" // for UNumberFormatFields literals
13
14 namespace {
15
16 // A version of uprv_memcpy that checks for length 0.
17 // By default, uprv_memcpy requires a length of at least 1.
uprv_memcpy2(void * dest,const void * src,size_t len)18 inline void uprv_memcpy2(void* dest, const void* src, size_t len) {
19 if (len > 0) {
20 uprv_memcpy(dest, src, len);
21 }
22 }
23
24 // A version of uprv_memmove that checks for length 0.
25 // By default, uprv_memmove requires a length of at least 1.
uprv_memmove2(void * dest,const void * src,size_t len)26 inline void uprv_memmove2(void* dest, const void* src, size_t len) {
27 if (len > 0) {
28 uprv_memmove(dest, src, len);
29 }
30 }
31
32 } // namespace
33
34
35 U_NAMESPACE_BEGIN
36
FormattedStringBuilder()37 FormattedStringBuilder::FormattedStringBuilder() {
38 #if U_DEBUG
39 // Initializing the memory to non-zero helps catch some bugs that involve
40 // reading from an improperly terminated string.
41 for (int32_t i=0; i<getCapacity(); i++) {
42 getCharPtr()[i] = 1;
43 }
44 #endif
45 }
46
~FormattedStringBuilder()47 FormattedStringBuilder::~FormattedStringBuilder() {
48 if (fUsingHeap) {
49 uprv_free(fChars.heap.ptr);
50 uprv_free(fFields.heap.ptr);
51 }
52 }
53
FormattedStringBuilder(const FormattedStringBuilder & other)54 FormattedStringBuilder::FormattedStringBuilder(const FormattedStringBuilder &other) {
55 *this = other;
56 }
57
operator =(const FormattedStringBuilder & other)58 FormattedStringBuilder &FormattedStringBuilder::operator=(const FormattedStringBuilder &other) {
59 // Check for self-assignment
60 if (this == &other) {
61 return *this;
62 }
63
64 // Continue with deallocation and copying
65 if (fUsingHeap) {
66 uprv_free(fChars.heap.ptr);
67 uprv_free(fFields.heap.ptr);
68 fUsingHeap = false;
69 }
70
71 int32_t capacity = other.getCapacity();
72 if (capacity > DEFAULT_CAPACITY) {
73 // FIXME: uprv_malloc
74 // C++ note: malloc appears in two places: here and in prepareForInsertHelper.
75 auto newChars = static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * capacity));
76 auto newFields = static_cast<Field *>(uprv_malloc(sizeof(Field) * capacity));
77 if (newChars == nullptr || newFields == nullptr) {
78 // UErrorCode is not available; fail silently.
79 uprv_free(newChars);
80 uprv_free(newFields);
81 *this = FormattedStringBuilder(); // can't fail
82 return *this;
83 }
84
85 fUsingHeap = true;
86 fChars.heap.capacity = capacity;
87 fChars.heap.ptr = newChars;
88 fFields.heap.capacity = capacity;
89 fFields.heap.ptr = newFields;
90 }
91
92 uprv_memcpy2(getCharPtr(), other.getCharPtr(), sizeof(char16_t) * capacity);
93 uprv_memcpy2(getFieldPtr(), other.getFieldPtr(), sizeof(Field) * capacity);
94
95 fZero = other.fZero;
96 fLength = other.fLength;
97 return *this;
98 }
99
length() const100 int32_t FormattedStringBuilder::length() const {
101 return fLength;
102 }
103
codePointCount() const104 int32_t FormattedStringBuilder::codePointCount() const {
105 return u_countChar32(getCharPtr() + fZero, fLength);
106 }
107
getFirstCodePoint() const108 UChar32 FormattedStringBuilder::getFirstCodePoint() const {
109 if (fLength == 0) {
110 return -1;
111 }
112 UChar32 cp;
113 U16_GET(getCharPtr() + fZero, 0, 0, fLength, cp);
114 return cp;
115 }
116
getLastCodePoint() const117 UChar32 FormattedStringBuilder::getLastCodePoint() const {
118 if (fLength == 0) {
119 return -1;
120 }
121 int32_t offset = fLength;
122 U16_BACK_1(getCharPtr() + fZero, 0, offset);
123 UChar32 cp;
124 U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp);
125 return cp;
126 }
127
codePointAt(int32_t index) const128 UChar32 FormattedStringBuilder::codePointAt(int32_t index) const {
129 UChar32 cp;
130 U16_GET(getCharPtr() + fZero, 0, index, fLength, cp);
131 return cp;
132 }
133
codePointBefore(int32_t index) const134 UChar32 FormattedStringBuilder::codePointBefore(int32_t index) const {
135 int32_t offset = index;
136 U16_BACK_1(getCharPtr() + fZero, 0, offset);
137 UChar32 cp;
138 U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp);
139 return cp;
140 }
141
clear()142 FormattedStringBuilder &FormattedStringBuilder::clear() {
143 // TODO: Reset the heap here?
144 fZero = getCapacity() / 2;
145 fLength = 0;
146 return *this;
147 }
148
149 int32_t
insertCodePoint(int32_t index,UChar32 codePoint,Field field,UErrorCode & status)150 FormattedStringBuilder::insertCodePoint(int32_t index, UChar32 codePoint, Field field, UErrorCode &status) {
151 int32_t count = U16_LENGTH(codePoint);
152 int32_t position = prepareForInsert(index, count, status);
153 if (U_FAILURE(status)) {
154 return count;
155 }
156 if (count == 1) {
157 getCharPtr()[position] = (char16_t) codePoint;
158 getFieldPtr()[position] = field;
159 } else {
160 getCharPtr()[position] = U16_LEAD(codePoint);
161 getCharPtr()[position + 1] = U16_TRAIL(codePoint);
162 getFieldPtr()[position] = getFieldPtr()[position + 1] = field;
163 }
164 return count;
165 }
166
insert(int32_t index,const UnicodeString & unistr,Field field,UErrorCode & status)167 int32_t FormattedStringBuilder::insert(int32_t index, const UnicodeString &unistr, Field field,
168 UErrorCode &status) {
169 if (unistr.length() == 0) {
170 // Nothing to insert.
171 return 0;
172 } else if (unistr.length() == 1) {
173 // Fast path: insert using insertCodePoint.
174 return insertCodePoint(index, unistr.charAt(0), field, status);
175 } else {
176 return insert(index, unistr, 0, unistr.length(), field, status);
177 }
178 }
179
180 int32_t
insert(int32_t index,const UnicodeString & unistr,int32_t start,int32_t end,Field field,UErrorCode & status)181 FormattedStringBuilder::insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end,
182 Field field, UErrorCode &status) {
183 int32_t count = end - start;
184 int32_t position = prepareForInsert(index, count, status);
185 if (U_FAILURE(status)) {
186 return count;
187 }
188 for (int32_t i = 0; i < count; i++) {
189 getCharPtr()[position + i] = unistr.charAt(start + i);
190 getFieldPtr()[position + i] = field;
191 }
192 return count;
193 }
194
195 int32_t
splice(int32_t startThis,int32_t endThis,const UnicodeString & unistr,int32_t startOther,int32_t endOther,Field field,UErrorCode & status)196 FormattedStringBuilder::splice(int32_t startThis, int32_t endThis, const UnicodeString &unistr,
197 int32_t startOther, int32_t endOther, Field field, UErrorCode& status) {
198 int32_t thisLength = endThis - startThis;
199 int32_t otherLength = endOther - startOther;
200 int32_t count = otherLength - thisLength;
201 if (U_FAILURE(status)) {
202 return count;
203 }
204 int32_t position;
205 if (count > 0) {
206 // Overall, chars need to be added.
207 position = prepareForInsert(startThis, count, status);
208 } else {
209 // Overall, chars need to be removed or kept the same.
210 position = remove(startThis, -count);
211 }
212 if (U_FAILURE(status)) {
213 return count;
214 }
215 for (int32_t i = 0; i < otherLength; i++) {
216 getCharPtr()[position + i] = unistr.charAt(startOther + i);
217 getFieldPtr()[position + i] = field;
218 }
219 return count;
220 }
221
append(const FormattedStringBuilder & other,UErrorCode & status)222 int32_t FormattedStringBuilder::append(const FormattedStringBuilder &other, UErrorCode &status) {
223 return insert(fLength, other, status);
224 }
225
226 int32_t
insert(int32_t index,const FormattedStringBuilder & other,UErrorCode & status)227 FormattedStringBuilder::insert(int32_t index, const FormattedStringBuilder &other, UErrorCode &status) {
228 if (U_FAILURE(status)) {
229 return 0;
230 }
231 if (this == &other) {
232 status = U_ILLEGAL_ARGUMENT_ERROR;
233 return 0;
234 }
235 int32_t count = other.fLength;
236 if (count == 0) {
237 // Nothing to insert.
238 return 0;
239 }
240 int32_t position = prepareForInsert(index, count, status);
241 if (U_FAILURE(status)) {
242 return count;
243 }
244 for (int32_t i = 0; i < count; i++) {
245 getCharPtr()[position + i] = other.charAt(i);
246 getFieldPtr()[position + i] = other.fieldAt(i);
247 }
248 return count;
249 }
250
writeTerminator(UErrorCode & status)251 void FormattedStringBuilder::writeTerminator(UErrorCode& status) {
252 int32_t position = prepareForInsert(fLength, 1, status);
253 if (U_FAILURE(status)) {
254 return;
255 }
256 getCharPtr()[position] = 0;
257 getFieldPtr()[position] = kUndefinedField;
258 fLength--;
259 }
260
prepareForInsert(int32_t index,int32_t count,UErrorCode & status)261 int32_t FormattedStringBuilder::prepareForInsert(int32_t index, int32_t count, UErrorCode &status) {
262 U_ASSERT(index >= 0);
263 U_ASSERT(index <= fLength);
264 U_ASSERT(count >= 0);
265 U_ASSERT(fZero >= 0);
266 U_ASSERT(fLength >= 0);
267 U_ASSERT(getCapacity() - fZero >= fLength);
268 if (U_FAILURE(status)) {
269 return count;
270 }
271 if (index == 0 && fZero - count >= 0) {
272 // Append to start
273 fZero -= count;
274 fLength += count;
275 return fZero;
276 } else if (index == fLength && count <= getCapacity() - fZero - fLength) {
277 // Append to end
278 fLength += count;
279 return fZero + fLength - count;
280 } else {
281 // Move chars around and/or allocate more space
282 return prepareForInsertHelper(index, count, status);
283 }
284 }
285
prepareForInsertHelper(int32_t index,int32_t count,UErrorCode & status)286 int32_t FormattedStringBuilder::prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status) {
287 int32_t oldCapacity = getCapacity();
288 int32_t oldZero = fZero;
289 char16_t *oldChars = getCharPtr();
290 Field *oldFields = getFieldPtr();
291 int32_t newLength;
292 if (uprv_add32_overflow(fLength, count, &newLength)) {
293 status = U_INPUT_TOO_LONG_ERROR;
294 return -1;
295 }
296 int32_t newZero;
297 if (newLength > oldCapacity) {
298 if (newLength > INT32_MAX / 2) {
299 // We do not support more than 1G char16_t in this code because
300 // dealing with >2G *bytes* can cause subtle bugs.
301 status = U_INPUT_TOO_LONG_ERROR;
302 return -1;
303 }
304 // Keep newCapacity also to at most 1G char16_t.
305 int32_t newCapacity = newLength * 2;
306 newZero = (newCapacity - newLength) / 2;
307
308 // C++ note: malloc appears in two places: here and in the assignment operator.
309 auto newChars = static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * static_cast<size_t>(newCapacity)));
310 auto newFields = static_cast<Field *>(uprv_malloc(sizeof(Field) * static_cast<size_t>(newCapacity)));
311 if (newChars == nullptr || newFields == nullptr) {
312 uprv_free(newChars);
313 uprv_free(newFields);
314 status = U_MEMORY_ALLOCATION_ERROR;
315 return -1;
316 }
317
318 // First copy the prefix and then the suffix, leaving room for the new chars that the
319 // caller wants to insert.
320 // C++ note: memcpy is OK because the src and dest do not overlap.
321 uprv_memcpy2(newChars + newZero, oldChars + oldZero, sizeof(char16_t) * index);
322 uprv_memcpy2(newChars + newZero + index + count,
323 oldChars + oldZero + index,
324 sizeof(char16_t) * (fLength - index));
325 uprv_memcpy2(newFields + newZero, oldFields + oldZero, sizeof(Field) * index);
326 uprv_memcpy2(newFields + newZero + index + count,
327 oldFields + oldZero + index,
328 sizeof(Field) * (fLength - index));
329
330 if (fUsingHeap) {
331 uprv_free(oldChars);
332 uprv_free(oldFields);
333 }
334 fUsingHeap = true;
335 fChars.heap.ptr = newChars;
336 fChars.heap.capacity = newCapacity;
337 fFields.heap.ptr = newFields;
338 fFields.heap.capacity = newCapacity;
339 } else {
340 newZero = (oldCapacity - newLength) / 2;
341
342 // C++ note: memmove is required because src and dest may overlap.
343 // First copy the entire string to the location of the prefix, and then move the suffix
344 // to make room for the new chars that the caller wants to insert.
345 uprv_memmove2(oldChars + newZero, oldChars + oldZero, sizeof(char16_t) * fLength);
346 uprv_memmove2(oldChars + newZero + index + count,
347 oldChars + newZero + index,
348 sizeof(char16_t) * (fLength - index));
349 uprv_memmove2(oldFields + newZero, oldFields + oldZero, sizeof(Field) * fLength);
350 uprv_memmove2(oldFields + newZero + index + count,
351 oldFields + newZero + index,
352 sizeof(Field) * (fLength - index));
353 }
354 fZero = newZero;
355 fLength = newLength;
356 return fZero + index;
357 }
358
remove(int32_t index,int32_t count)359 int32_t FormattedStringBuilder::remove(int32_t index, int32_t count) {
360 U_ASSERT(0 <= index);
361 U_ASSERT(index <= fLength);
362 U_ASSERT(count <= (fLength - index));
363 U_ASSERT(index <= getCapacity() - fZero);
364
365 int32_t position = index + fZero;
366 // TODO: Reset the heap here? (If the string after removal can fit on stack?)
367 uprv_memmove2(getCharPtr() + position,
368 getCharPtr() + position + count,
369 sizeof(char16_t) * (fLength - index - count));
370 uprv_memmove2(getFieldPtr() + position,
371 getFieldPtr() + position + count,
372 sizeof(Field) * (fLength - index - count));
373 fLength -= count;
374 return position;
375 }
376
toUnicodeString() const377 UnicodeString FormattedStringBuilder::toUnicodeString() const {
378 return UnicodeString(getCharPtr() + fZero, fLength);
379 }
380
toTempUnicodeString() const381 const UnicodeString FormattedStringBuilder::toTempUnicodeString() const {
382 // Readonly-alias constructor:
383 return UnicodeString(false, getCharPtr() + fZero, fLength);
384 }
385
toDebugString() const386 UnicodeString FormattedStringBuilder::toDebugString() const {
387 UnicodeString sb;
388 sb.append(u"<FormattedStringBuilder [", -1);
389 sb.append(toUnicodeString());
390 sb.append(u"] [", -1);
391 for (int i = 0; i < fLength; i++) {
392 if (fieldAt(i) == kUndefinedField) {
393 sb.append(u'n');
394 } else if (fieldAt(i).getCategory() == UFIELD_CATEGORY_NUMBER) {
395 char16_t c;
396 switch (fieldAt(i).getField()) {
397 case UNUM_SIGN_FIELD:
398 c = u'-';
399 break;
400 case UNUM_INTEGER_FIELD:
401 c = u'i';
402 break;
403 case UNUM_FRACTION_FIELD:
404 c = u'f';
405 break;
406 case UNUM_EXPONENT_FIELD:
407 c = u'e';
408 break;
409 case UNUM_EXPONENT_SIGN_FIELD:
410 c = u'+';
411 break;
412 case UNUM_EXPONENT_SYMBOL_FIELD:
413 c = u'E';
414 break;
415 case UNUM_DECIMAL_SEPARATOR_FIELD:
416 c = u'.';
417 break;
418 case UNUM_GROUPING_SEPARATOR_FIELD:
419 c = u',';
420 break;
421 case UNUM_PERCENT_FIELD:
422 c = u'%';
423 break;
424 case UNUM_PERMILL_FIELD:
425 c = u'‰';
426 break;
427 case UNUM_CURRENCY_FIELD:
428 c = u'$';
429 break;
430 default:
431 c = u'0' + fieldAt(i).getField();
432 break;
433 }
434 sb.append(c);
435 } else {
436 sb.append(u'0' + fieldAt(i).getCategory());
437 }
438 }
439 sb.append(u"]>", -1);
440 return sb;
441 }
442
chars() const443 const char16_t *FormattedStringBuilder::chars() const {
444 return getCharPtr() + fZero;
445 }
446
contentEquals(const FormattedStringBuilder & other) const447 bool FormattedStringBuilder::contentEquals(const FormattedStringBuilder &other) const {
448 if (fLength != other.fLength) {
449 return false;
450 }
451 for (int32_t i = 0; i < fLength; i++) {
452 if (charAt(i) != other.charAt(i) || fieldAt(i) != other.fieldAt(i)) {
453 return false;
454 }
455 }
456 return true;
457 }
458
containsField(Field field) const459 bool FormattedStringBuilder::containsField(Field field) const {
460 for (int32_t i = 0; i < fLength; i++) {
461 if (field == fieldAt(i)) {
462 return true;
463 }
464 }
465 return false;
466 }
467
468 U_NAMESPACE_END
469
470 #endif /* #if !UCONFIG_NO_FORMATTING */
471