1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 #include "unicode/utypes.h"
5
6 #if !UCONFIG_NO_FORMATTING
7
8 #include "formatted_string_builder.h"
9 #include "unicode/ustring.h"
10 #include "unicode/utf16.h"
11 #include "unicode/unum.h" // for UNumberFormatFields literals
12
13 namespace {
14
15 // A version of uprv_memcpy that checks for length 0.
16 // By default, uprv_memcpy requires a length of at least 1.
uprv_memcpy2(void * dest,const void * src,size_t len)17 inline void uprv_memcpy2(void* dest, const void* src, size_t len) {
18 if (len > 0) {
19 uprv_memcpy(dest, src, len);
20 }
21 }
22
23 // A version of uprv_memmove that checks for length 0.
24 // By default, uprv_memmove requires a length of at least 1.
uprv_memmove2(void * dest,const void * src,size_t len)25 inline void uprv_memmove2(void* dest, const void* src, size_t len) {
26 if (len > 0) {
27 uprv_memmove(dest, src, len);
28 }
29 }
30
31 } // namespace
32
33
34 U_NAMESPACE_BEGIN
35
FormattedStringBuilder()36 FormattedStringBuilder::FormattedStringBuilder() {
37 #if U_DEBUG
38 // Initializing the memory to non-zero helps catch some bugs that involve
39 // reading from an improperly terminated string.
40 for (int32_t i=0; i<getCapacity(); i++) {
41 getCharPtr()[i] = 1;
42 }
43 #endif
44 }
45
~FormattedStringBuilder()46 FormattedStringBuilder::~FormattedStringBuilder() {
47 if (fUsingHeap) {
48 uprv_free(fChars.heap.ptr);
49 uprv_free(fFields.heap.ptr);
50 }
51 }
52
FormattedStringBuilder(const FormattedStringBuilder & other)53 FormattedStringBuilder::FormattedStringBuilder(const FormattedStringBuilder &other) {
54 *this = other;
55 }
56
operator =(const FormattedStringBuilder & other)57 FormattedStringBuilder &FormattedStringBuilder::operator=(const FormattedStringBuilder &other) {
58 // Check for self-assignment
59 if (this == &other) {
60 return *this;
61 }
62
63 // Continue with deallocation and copying
64 if (fUsingHeap) {
65 uprv_free(fChars.heap.ptr);
66 uprv_free(fFields.heap.ptr);
67 fUsingHeap = false;
68 }
69
70 int32_t capacity = other.getCapacity();
71 if (capacity > DEFAULT_CAPACITY) {
72 // FIXME: uprv_malloc
73 // C++ note: malloc appears in two places: here and in prepareForInsertHelper.
74 auto newChars = static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * capacity));
75 auto newFields = static_cast<Field *>(uprv_malloc(sizeof(Field) * capacity));
76 if (newChars == nullptr || newFields == nullptr) {
77 // UErrorCode is not available; fail silently.
78 uprv_free(newChars);
79 uprv_free(newFields);
80 *this = FormattedStringBuilder(); // can't fail
81 return *this;
82 }
83
84 fUsingHeap = true;
85 fChars.heap.capacity = capacity;
86 fChars.heap.ptr = newChars;
87 fFields.heap.capacity = capacity;
88 fFields.heap.ptr = newFields;
89 }
90
91 uprv_memcpy2(getCharPtr(), other.getCharPtr(), sizeof(char16_t) * capacity);
92 uprv_memcpy2(getFieldPtr(), other.getFieldPtr(), sizeof(Field) * capacity);
93
94 fZero = other.fZero;
95 fLength = other.fLength;
96 return *this;
97 }
98
length() const99 int32_t FormattedStringBuilder::length() const {
100 return fLength;
101 }
102
codePointCount() const103 int32_t FormattedStringBuilder::codePointCount() const {
104 return u_countChar32(getCharPtr() + fZero, fLength);
105 }
106
getFirstCodePoint() const107 UChar32 FormattedStringBuilder::getFirstCodePoint() const {
108 if (fLength == 0) {
109 return -1;
110 }
111 UChar32 cp;
112 U16_GET(getCharPtr() + fZero, 0, 0, fLength, cp);
113 return cp;
114 }
115
getLastCodePoint() const116 UChar32 FormattedStringBuilder::getLastCodePoint() const {
117 if (fLength == 0) {
118 return -1;
119 }
120 int32_t offset = fLength;
121 U16_BACK_1(getCharPtr() + fZero, 0, offset);
122 UChar32 cp;
123 U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp);
124 return cp;
125 }
126
codePointAt(int32_t index) const127 UChar32 FormattedStringBuilder::codePointAt(int32_t index) const {
128 UChar32 cp;
129 U16_GET(getCharPtr() + fZero, 0, index, fLength, cp);
130 return cp;
131 }
132
codePointBefore(int32_t index) const133 UChar32 FormattedStringBuilder::codePointBefore(int32_t index) const {
134 int32_t offset = index;
135 U16_BACK_1(getCharPtr() + fZero, 0, offset);
136 UChar32 cp;
137 U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp);
138 return cp;
139 }
140
clear()141 FormattedStringBuilder &FormattedStringBuilder::clear() {
142 // TODO: Reset the heap here?
143 fZero = getCapacity() / 2;
144 fLength = 0;
145 return *this;
146 }
147
148 int32_t
insertCodePoint(int32_t index,UChar32 codePoint,Field field,UErrorCode & status)149 FormattedStringBuilder::insertCodePoint(int32_t index, UChar32 codePoint, Field field, UErrorCode &status) {
150 int32_t count = U16_LENGTH(codePoint);
151 int32_t position = prepareForInsert(index, count, status);
152 if (U_FAILURE(status)) {
153 return count;
154 }
155 if (count == 1) {
156 getCharPtr()[position] = (char16_t) codePoint;
157 getFieldPtr()[position] = field;
158 } else {
159 getCharPtr()[position] = U16_LEAD(codePoint);
160 getCharPtr()[position + 1] = U16_TRAIL(codePoint);
161 getFieldPtr()[position] = getFieldPtr()[position + 1] = field;
162 }
163 return count;
164 }
165
insert(int32_t index,const UnicodeString & unistr,Field field,UErrorCode & status)166 int32_t FormattedStringBuilder::insert(int32_t index, const UnicodeString &unistr, Field field,
167 UErrorCode &status) {
168 if (unistr.length() == 0) {
169 // Nothing to insert.
170 return 0;
171 } else if (unistr.length() == 1) {
172 // Fast path: insert using insertCodePoint.
173 return insertCodePoint(index, unistr.charAt(0), field, status);
174 } else {
175 return insert(index, unistr, 0, unistr.length(), field, status);
176 }
177 }
178
179 int32_t
insert(int32_t index,const UnicodeString & unistr,int32_t start,int32_t end,Field field,UErrorCode & status)180 FormattedStringBuilder::insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end,
181 Field field, UErrorCode &status) {
182 int32_t count = end - start;
183 int32_t position = prepareForInsert(index, count, status);
184 if (U_FAILURE(status)) {
185 return count;
186 }
187 for (int32_t i = 0; i < count; i++) {
188 getCharPtr()[position + i] = unistr.charAt(start + i);
189 getFieldPtr()[position + i] = field;
190 }
191 return count;
192 }
193
194 int32_t
splice(int32_t startThis,int32_t endThis,const UnicodeString & unistr,int32_t startOther,int32_t endOther,Field field,UErrorCode & status)195 FormattedStringBuilder::splice(int32_t startThis, int32_t endThis, const UnicodeString &unistr,
196 int32_t startOther, int32_t endOther, Field field, UErrorCode& status) {
197 int32_t thisLength = endThis - startThis;
198 int32_t otherLength = endOther - startOther;
199 int32_t count = otherLength - thisLength;
200 int32_t position;
201 if (count > 0) {
202 // Overall, chars need to be added.
203 position = prepareForInsert(startThis, count, status);
204 } else {
205 // Overall, chars need to be removed or kept the same.
206 position = remove(startThis, -count);
207 }
208 if (U_FAILURE(status)) {
209 return count;
210 }
211 for (int32_t i = 0; i < otherLength; i++) {
212 getCharPtr()[position + i] = unistr.charAt(startOther + i);
213 getFieldPtr()[position + i] = field;
214 }
215 return count;
216 }
217
append(const FormattedStringBuilder & other,UErrorCode & status)218 int32_t FormattedStringBuilder::append(const FormattedStringBuilder &other, UErrorCode &status) {
219 return insert(fLength, other, status);
220 }
221
222 int32_t
insert(int32_t index,const FormattedStringBuilder & other,UErrorCode & status)223 FormattedStringBuilder::insert(int32_t index, const FormattedStringBuilder &other, UErrorCode &status) {
224 if (this == &other) {
225 status = U_ILLEGAL_ARGUMENT_ERROR;
226 return 0;
227 }
228 int32_t count = other.fLength;
229 if (count == 0) {
230 // Nothing to insert.
231 return 0;
232 }
233 int32_t position = prepareForInsert(index, count, status);
234 if (U_FAILURE(status)) {
235 return count;
236 }
237 for (int32_t i = 0; i < count; i++) {
238 getCharPtr()[position + i] = other.charAt(i);
239 getFieldPtr()[position + i] = other.fieldAt(i);
240 }
241 return count;
242 }
243
writeTerminator(UErrorCode & status)244 void FormattedStringBuilder::writeTerminator(UErrorCode& status) {
245 int32_t position = prepareForInsert(fLength, 1, status);
246 if (U_FAILURE(status)) {
247 return;
248 }
249 getCharPtr()[position] = 0;
250 getFieldPtr()[position] = kUndefinedField;
251 fLength--;
252 }
253
prepareForInsert(int32_t index,int32_t count,UErrorCode & status)254 int32_t FormattedStringBuilder::prepareForInsert(int32_t index, int32_t count, UErrorCode &status) {
255 U_ASSERT(index >= 0);
256 U_ASSERT(index <= fLength);
257 U_ASSERT(count >= 0);
258 if (index == 0 && fZero - count >= 0) {
259 // Append to start
260 fZero -= count;
261 fLength += count;
262 return fZero;
263 } else if (index == fLength && fZero + fLength + count < getCapacity()) {
264 // Append to end
265 fLength += count;
266 return fZero + fLength - count;
267 } else {
268 // Move chars around and/or allocate more space
269 return prepareForInsertHelper(index, count, status);
270 }
271 }
272
prepareForInsertHelper(int32_t index,int32_t count,UErrorCode & status)273 int32_t FormattedStringBuilder::prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status) {
274 int32_t oldCapacity = getCapacity();
275 int32_t oldZero = fZero;
276 char16_t *oldChars = getCharPtr();
277 Field *oldFields = getFieldPtr();
278 if (fLength + count > oldCapacity) {
279 if ((fLength + count) > INT32_MAX / 2) {
280 // If we continue, then newCapacity will overflow int32_t in the next line.
281 status = U_INPUT_TOO_LONG_ERROR;
282 return -1;
283 }
284 int32_t newCapacity = (fLength + count) * 2;
285 int32_t newZero = newCapacity / 2 - (fLength + count) / 2;
286
287 // C++ note: malloc appears in two places: here and in the assignment operator.
288 auto newChars = static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * newCapacity));
289 auto newFields = static_cast<Field *>(uprv_malloc(sizeof(Field) * newCapacity));
290 if (newChars == nullptr || newFields == nullptr) {
291 uprv_free(newChars);
292 uprv_free(newFields);
293 status = U_MEMORY_ALLOCATION_ERROR;
294 return -1;
295 }
296
297 // First copy the prefix and then the suffix, leaving room for the new chars that the
298 // caller wants to insert.
299 // C++ note: memcpy is OK because the src and dest do not overlap.
300 uprv_memcpy2(newChars + newZero, oldChars + oldZero, sizeof(char16_t) * index);
301 uprv_memcpy2(newChars + newZero + index + count,
302 oldChars + oldZero + index,
303 sizeof(char16_t) * (fLength - index));
304 uprv_memcpy2(newFields + newZero, oldFields + oldZero, sizeof(Field) * index);
305 uprv_memcpy2(newFields + newZero + index + count,
306 oldFields + oldZero + index,
307 sizeof(Field) * (fLength - index));
308
309 if (fUsingHeap) {
310 uprv_free(oldChars);
311 uprv_free(oldFields);
312 }
313 fUsingHeap = true;
314 fChars.heap.ptr = newChars;
315 fChars.heap.capacity = newCapacity;
316 fFields.heap.ptr = newFields;
317 fFields.heap.capacity = newCapacity;
318 fZero = newZero;
319 fLength += count;
320 } else {
321 int32_t newZero = oldCapacity / 2 - (fLength + count) / 2;
322
323 // C++ note: memmove is required because src and dest may overlap.
324 // First copy the entire string to the location of the prefix, and then move the suffix
325 // to make room for the new chars that the caller wants to insert.
326 uprv_memmove2(oldChars + newZero, oldChars + oldZero, sizeof(char16_t) * fLength);
327 uprv_memmove2(oldChars + newZero + index + count,
328 oldChars + newZero + index,
329 sizeof(char16_t) * (fLength - index));
330 uprv_memmove2(oldFields + newZero, oldFields + oldZero, sizeof(Field) * fLength);
331 uprv_memmove2(oldFields + newZero + index + count,
332 oldFields + newZero + index,
333 sizeof(Field) * (fLength - index));
334
335 fZero = newZero;
336 fLength += count;
337 }
338 U_ASSERT((fZero + index) >= 0);
339 return fZero + index;
340 }
341
remove(int32_t index,int32_t count)342 int32_t FormattedStringBuilder::remove(int32_t index, int32_t count) {
343 // TODO: Reset the heap here? (If the string after removal can fit on stack?)
344 int32_t position = index + fZero;
345 U_ASSERT(position >= 0);
346 uprv_memmove2(getCharPtr() + position,
347 getCharPtr() + position + count,
348 sizeof(char16_t) * (fLength - index - count));
349 uprv_memmove2(getFieldPtr() + position,
350 getFieldPtr() + position + count,
351 sizeof(Field) * (fLength - index - count));
352 fLength -= count;
353 return position;
354 }
355
toUnicodeString() const356 UnicodeString FormattedStringBuilder::toUnicodeString() const {
357 return UnicodeString(getCharPtr() + fZero, fLength);
358 }
359
toTempUnicodeString() const360 const UnicodeString FormattedStringBuilder::toTempUnicodeString() const {
361 // Readonly-alias constructor:
362 return UnicodeString(FALSE, getCharPtr() + fZero, fLength);
363 }
364
toDebugString() const365 UnicodeString FormattedStringBuilder::toDebugString() const {
366 UnicodeString sb;
367 sb.append(u"<FormattedStringBuilder [", -1);
368 sb.append(toUnicodeString());
369 sb.append(u"] [", -1);
370 for (int i = 0; i < fLength; i++) {
371 if (fieldAt(i) == kUndefinedField) {
372 sb.append(u'n');
373 } else if (fieldAt(i).getCategory() == UFIELD_CATEGORY_NUMBER) {
374 char16_t c;
375 switch (fieldAt(i).getField()) {
376 case UNUM_SIGN_FIELD:
377 c = u'-';
378 break;
379 case UNUM_INTEGER_FIELD:
380 c = u'i';
381 break;
382 case UNUM_FRACTION_FIELD:
383 c = u'f';
384 break;
385 case UNUM_EXPONENT_FIELD:
386 c = u'e';
387 break;
388 case UNUM_EXPONENT_SIGN_FIELD:
389 c = u'+';
390 break;
391 case UNUM_EXPONENT_SYMBOL_FIELD:
392 c = u'E';
393 break;
394 case UNUM_DECIMAL_SEPARATOR_FIELD:
395 c = u'.';
396 break;
397 case UNUM_GROUPING_SEPARATOR_FIELD:
398 c = u',';
399 break;
400 case UNUM_PERCENT_FIELD:
401 c = u'%';
402 break;
403 case UNUM_PERMILL_FIELD:
404 c = u'‰';
405 break;
406 case UNUM_CURRENCY_FIELD:
407 c = u'$';
408 break;
409 default:
410 c = u'0' + fieldAt(i).getField();
411 break;
412 }
413 sb.append(c);
414 } else {
415 sb.append(u'0' + fieldAt(i).getCategory());
416 }
417 }
418 sb.append(u"]>", -1);
419 return sb;
420 }
421
chars() const422 const char16_t *FormattedStringBuilder::chars() const {
423 return getCharPtr() + fZero;
424 }
425
contentEquals(const FormattedStringBuilder & other) const426 bool FormattedStringBuilder::contentEquals(const FormattedStringBuilder &other) const {
427 if (fLength != other.fLength) {
428 return false;
429 }
430 for (int32_t i = 0; i < fLength; i++) {
431 if (charAt(i) != other.charAt(i) || fieldAt(i) != other.fieldAt(i)) {
432 return false;
433 }
434 }
435 return true;
436 }
437
containsField(Field field) const438 bool FormattedStringBuilder::containsField(Field field) const {
439 for (int32_t i = 0; i < fLength; i++) {
440 if (field == fieldAt(i)) {
441 return true;
442 }
443 }
444 return false;
445 }
446
447 U_NAMESPACE_END
448
449 #endif /* #if !UCONFIG_NO_FORMATTING */
450