• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #if !UCONFIG_NO_FORMATTING
7 
8 #include "formatted_string_builder.h"
9 #include "putilimp.h"
10 #include "unicode/ustring.h"
11 #include "unicode/utf16.h"
12 #include "unicode/unum.h" // for UNumberFormatFields literals
13 
14 namespace {
15 
16 // A version of uprv_memcpy that checks for length 0.
17 // By default, uprv_memcpy requires a length of at least 1.
uprv_memcpy2(void * dest,const void * src,size_t len)18 inline void uprv_memcpy2(void* dest, const void* src, size_t len) {
19     if (len > 0) {
20         uprv_memcpy(dest, src, len);
21     }
22 }
23 
24 // A version of uprv_memmove that checks for length 0.
25 // By default, uprv_memmove requires a length of at least 1.
uprv_memmove2(void * dest,const void * src,size_t len)26 inline void uprv_memmove2(void* dest, const void* src, size_t len) {
27     if (len > 0) {
28         uprv_memmove(dest, src, len);
29     }
30 }
31 
32 } // namespace
33 
34 
35 U_NAMESPACE_BEGIN
36 
FormattedStringBuilder()37 FormattedStringBuilder::FormattedStringBuilder() {
38 #if U_DEBUG
39     // Initializing the memory to non-zero helps catch some bugs that involve
40     // reading from an improperly terminated string.
41     for (int32_t i=0; i<getCapacity(); i++) {
42         getCharPtr()[i] = 1;
43     }
44 #endif
45 }
46 
~FormattedStringBuilder()47 FormattedStringBuilder::~FormattedStringBuilder() {
48     if (fUsingHeap) {
49         uprv_free(fChars.heap.ptr);
50         uprv_free(fFields.heap.ptr);
51     }
52 }
53 
FormattedStringBuilder(const FormattedStringBuilder & other)54 FormattedStringBuilder::FormattedStringBuilder(const FormattedStringBuilder &other) {
55     *this = other;
56 }
57 
operator =(const FormattedStringBuilder & other)58 FormattedStringBuilder &FormattedStringBuilder::operator=(const FormattedStringBuilder &other) {
59     // Check for self-assignment
60     if (this == &other) {
61         return *this;
62     }
63 
64     // Continue with deallocation and copying
65     if (fUsingHeap) {
66         uprv_free(fChars.heap.ptr);
67         uprv_free(fFields.heap.ptr);
68         fUsingHeap = false;
69     }
70 
71     int32_t capacity = other.getCapacity();
72     if (capacity > DEFAULT_CAPACITY) {
73         // FIXME: uprv_malloc
74         // C++ note: malloc appears in two places: here and in prepareForInsertHelper.
75         auto* newChars = static_cast<char16_t*>(uprv_malloc(sizeof(char16_t) * capacity));
76         auto* newFields = static_cast<Field*>(uprv_malloc(sizeof(Field) * capacity));
77         if (newChars == nullptr || newFields == nullptr) {
78             // UErrorCode is not available; fail silently.
79             uprv_free(newChars);
80             uprv_free(newFields);
81             *this = FormattedStringBuilder();  // can't fail
82             return *this;
83         }
84 
85         fUsingHeap = true;
86         fChars.heap.capacity = capacity;
87         fChars.heap.ptr = newChars;
88         fFields.heap.capacity = capacity;
89         fFields.heap.ptr = newFields;
90     }
91 
92     uprv_memcpy2(getCharPtr(), other.getCharPtr(), sizeof(char16_t) * capacity);
93     uprv_memcpy2(getFieldPtr(), other.getFieldPtr(), sizeof(Field) * capacity);
94 
95     fZero = other.fZero;
96     fLength = other.fLength;
97     return *this;
98 }
99 
length() const100 int32_t FormattedStringBuilder::length() const {
101     return fLength;
102 }
103 
codePointCount() const104 int32_t FormattedStringBuilder::codePointCount() const {
105     return u_countChar32(getCharPtr() + fZero, fLength);
106 }
107 
getFirstCodePoint() const108 UChar32 FormattedStringBuilder::getFirstCodePoint() const {
109     if (fLength == 0) {
110         return -1;
111     }
112     UChar32 cp;
113     U16_GET(getCharPtr() + fZero, 0, 0, fLength, cp);
114     return cp;
115 }
116 
getLastCodePoint() const117 UChar32 FormattedStringBuilder::getLastCodePoint() const {
118     if (fLength == 0) {
119         return -1;
120     }
121     int32_t offset = fLength;
122     U16_BACK_1(getCharPtr() + fZero, 0, offset);
123     UChar32 cp;
124     U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp);
125     return cp;
126 }
127 
codePointAt(int32_t index) const128 UChar32 FormattedStringBuilder::codePointAt(int32_t index) const {
129     UChar32 cp;
130     U16_GET(getCharPtr() + fZero, 0, index, fLength, cp);
131     return cp;
132 }
133 
codePointBefore(int32_t index) const134 UChar32 FormattedStringBuilder::codePointBefore(int32_t index) const {
135     int32_t offset = index;
136     U16_BACK_1(getCharPtr() + fZero, 0, offset);
137     UChar32 cp;
138     U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp);
139     return cp;
140 }
141 
clear()142 FormattedStringBuilder &FormattedStringBuilder::clear() {
143     // TODO: Reset the heap here?
144     fZero = getCapacity() / 2;
145     fLength = 0;
146     return *this;
147 }
148 
149 int32_t
insertCodePoint(int32_t index,UChar32 codePoint,Field field,UErrorCode & status)150 FormattedStringBuilder::insertCodePoint(int32_t index, UChar32 codePoint, Field field, UErrorCode &status) {
151     int32_t count = U16_LENGTH(codePoint);
152     int32_t position = prepareForInsert(index, count, status);
153     if (U_FAILURE(status)) {
154         return count;
155     }
156     auto* charPtr = getCharPtr();
157     auto* fieldPtr = getFieldPtr();
158     if (count == 1) {
159         charPtr[position] = static_cast<char16_t>(codePoint);
160         fieldPtr[position] = field;
161     } else {
162         charPtr[position] = U16_LEAD(codePoint);
163         charPtr[position + 1] = U16_TRAIL(codePoint);
164         fieldPtr[position] = fieldPtr[position + 1] = field;
165     }
166     return count;
167 }
168 
insert(int32_t index,const UnicodeString & unistr,Field field,UErrorCode & status)169 int32_t FormattedStringBuilder::insert(int32_t index, const UnicodeString &unistr, Field field,
170                                     UErrorCode &status) {
171     if (unistr.length() == 0) {
172         // Nothing to insert.
173         return 0;
174     } else if (unistr.length() == 1) {
175         // Fast path: insert using insertCodePoint.
176         return insertCodePoint(index, unistr.charAt(0), field, status);
177     } else {
178         return insert(index, unistr, 0, unistr.length(), field, status);
179     }
180 }
181 
182 int32_t
insert(int32_t index,const UnicodeString & unistr,int32_t start,int32_t end,Field field,UErrorCode & status)183 FormattedStringBuilder::insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end,
184                             Field field, UErrorCode &status) {
185     int32_t count = end - start;
186     int32_t position = prepareForInsert(index, count, status);
187     if (U_FAILURE(status)) {
188         return count;
189     }
190     for (int32_t i = 0; i < count; i++) {
191         getCharPtr()[position + i] = unistr.charAt(start + i);
192         getFieldPtr()[position + i] = field;
193     }
194     return count;
195 }
196 
197 int32_t
splice(int32_t startThis,int32_t endThis,const UnicodeString & unistr,int32_t startOther,int32_t endOther,Field field,UErrorCode & status)198 FormattedStringBuilder::splice(int32_t startThis, int32_t endThis,  const UnicodeString &unistr,
199                             int32_t startOther, int32_t endOther, Field field, UErrorCode& status) {
200     int32_t thisLength = endThis - startThis;
201     int32_t otherLength = endOther - startOther;
202     int32_t count = otherLength - thisLength;
203     if (U_FAILURE(status)) {
204         return count;
205     }
206     int32_t position;
207     if (count > 0) {
208         // Overall, chars need to be added.
209         position = prepareForInsert(startThis, count, status);
210     } else {
211         // Overall, chars need to be removed or kept the same.
212         position = remove(startThis, -count);
213     }
214     if (U_FAILURE(status)) {
215         return count;
216     }
217     for (int32_t i = 0; i < otherLength; i++) {
218         getCharPtr()[position + i] = unistr.charAt(startOther + i);
219         getFieldPtr()[position + i] = field;
220     }
221     return count;
222 }
223 
append(const FormattedStringBuilder & other,UErrorCode & status)224 int32_t FormattedStringBuilder::append(const FormattedStringBuilder &other, UErrorCode &status) {
225     return insert(fLength, other, status);
226 }
227 
228 int32_t
insert(int32_t index,const FormattedStringBuilder & other,UErrorCode & status)229 FormattedStringBuilder::insert(int32_t index, const FormattedStringBuilder &other, UErrorCode &status) {
230     if (U_FAILURE(status)) {
231         return 0;
232     }
233     if (this == &other) {
234         status = U_ILLEGAL_ARGUMENT_ERROR;
235         return 0;
236     }
237     int32_t count = other.fLength;
238     if (count == 0) {
239         // Nothing to insert.
240         return 0;
241     }
242     int32_t position = prepareForInsert(index, count, status);
243     if (U_FAILURE(status)) {
244         return count;
245     }
246     for (int32_t i = 0; i < count; i++) {
247         getCharPtr()[position + i] = other.charAt(i);
248         getFieldPtr()[position + i] = other.fieldAt(i);
249     }
250     return count;
251 }
252 
writeTerminator(UErrorCode & status)253 void FormattedStringBuilder::writeTerminator(UErrorCode& status) {
254     int32_t position = prepareForInsert(fLength, 1, status);
255     if (U_FAILURE(status)) {
256         return;
257     }
258     getCharPtr()[position] = 0;
259     getFieldPtr()[position] = kUndefinedField;
260     fLength--;
261 }
262 
prepareForInsert(int32_t index,int32_t count,UErrorCode & status)263 int32_t FormattedStringBuilder::prepareForInsert(int32_t index, int32_t count, UErrorCode &status) {
264     U_ASSERT(index >= 0);
265     U_ASSERT(index <= fLength);
266     U_ASSERT(count >= 0);
267     U_ASSERT(fZero >= 0);
268     U_ASSERT(fLength >= 0);
269     U_ASSERT(getCapacity() - fZero >= fLength);
270     if (U_FAILURE(status)) {
271         return count;
272     }
273     if (index == 0 && fZero - count >= 0) {
274         // Append to start
275         fZero -= count;
276         fLength += count;
277         return fZero;
278     } else if (index == fLength && count <= getCapacity() - fZero - fLength) {
279         // Append to end
280         fLength += count;
281         return fZero + fLength - count;
282     } else {
283         // Move chars around and/or allocate more space
284         return prepareForInsertHelper(index, count, status);
285     }
286 }
287 
prepareForInsertHelper(int32_t index,int32_t count,UErrorCode & status)288 int32_t FormattedStringBuilder::prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status) {
289     int32_t oldCapacity = getCapacity();
290     int32_t oldZero = fZero;
291     char16_t *oldChars = getCharPtr();
292     Field *oldFields = getFieldPtr();
293     int32_t newLength;
294     if (uprv_add32_overflow(fLength, count, &newLength)) {
295         status = U_INPUT_TOO_LONG_ERROR;
296         return -1;
297     }
298     int32_t newZero;
299     if (newLength > oldCapacity) {
300         if (newLength > INT32_MAX / 2) {
301             // We do not support more than 1G char16_t in this code because
302             // dealing with >2G *bytes* can cause subtle bugs.
303             status = U_INPUT_TOO_LONG_ERROR;
304             return -1;
305         }
306         // Keep newCapacity also to at most 1G char16_t.
307         int32_t newCapacity = newLength * 2;
308         newZero = (newCapacity - newLength) / 2;
309 
310         // C++ note: malloc appears in two places: here and in the assignment operator.
311         auto* newChars =
312             static_cast<char16_t*>(uprv_malloc(sizeof(char16_t) * static_cast<size_t>(newCapacity)));
313         auto* newFields =
314             static_cast<Field*>(uprv_malloc(sizeof(Field) * static_cast<size_t>(newCapacity)));
315         if (newChars == nullptr || newFields == nullptr) {
316             uprv_free(newChars);
317             uprv_free(newFields);
318             status = U_MEMORY_ALLOCATION_ERROR;
319             return -1;
320         }
321 
322         // First copy the prefix and then the suffix, leaving room for the new chars that the
323         // caller wants to insert.
324         // C++ note: memcpy is OK because the src and dest do not overlap.
325         uprv_memcpy2(newChars + newZero, oldChars + oldZero, sizeof(char16_t) * index);
326         uprv_memcpy2(newChars + newZero + index + count,
327                 oldChars + oldZero + index,
328                 sizeof(char16_t) * (fLength - index));
329         uprv_memcpy2(newFields + newZero, oldFields + oldZero, sizeof(Field) * index);
330         uprv_memcpy2(newFields + newZero + index + count,
331                 oldFields + oldZero + index,
332                 sizeof(Field) * (fLength - index));
333 
334         if (fUsingHeap) {
335             uprv_free(oldChars);
336             uprv_free(oldFields);
337         }
338         fUsingHeap = true;
339         fChars.heap.ptr = newChars;
340         fChars.heap.capacity = newCapacity;
341         fFields.heap.ptr = newFields;
342         fFields.heap.capacity = newCapacity;
343     } else {
344         newZero = (oldCapacity - newLength) / 2;
345 
346         // C++ note: memmove is required because src and dest may overlap.
347         // First copy the entire string to the location of the prefix, and then move the suffix
348         // to make room for the new chars that the caller wants to insert.
349         uprv_memmove2(oldChars + newZero, oldChars + oldZero, sizeof(char16_t) * fLength);
350         uprv_memmove2(oldChars + newZero + index + count,
351                 oldChars + newZero + index,
352                 sizeof(char16_t) * (fLength - index));
353         uprv_memmove2(oldFields + newZero, oldFields + oldZero, sizeof(Field) * fLength);
354         uprv_memmove2(oldFields + newZero + index + count,
355                 oldFields + newZero + index,
356                 sizeof(Field) * (fLength - index));
357     }
358     fZero = newZero;
359     fLength = newLength;
360     return fZero + index;
361 }
362 
remove(int32_t index,int32_t count)363 int32_t FormattedStringBuilder::remove(int32_t index, int32_t count) {
364      U_ASSERT(0 <= index);
365      U_ASSERT(index <= fLength);
366      U_ASSERT(count <= (fLength - index));
367      U_ASSERT(index <= getCapacity() - fZero);
368 
369     int32_t position = index + fZero;
370     // TODO: Reset the heap here?  (If the string after removal can fit on stack?)
371     uprv_memmove2(getCharPtr() + position,
372             getCharPtr() + position + count,
373             sizeof(char16_t) * (fLength - index - count));
374     uprv_memmove2(getFieldPtr() + position,
375             getFieldPtr() + position + count,
376             sizeof(Field) * (fLength - index - count));
377     fLength -= count;
378     return position;
379 }
380 
toUnicodeString() const381 UnicodeString FormattedStringBuilder::toUnicodeString() const {
382     return UnicodeString(getCharPtr() + fZero, fLength);
383 }
384 
toTempUnicodeString() const385 UnicodeString FormattedStringBuilder::toTempUnicodeString() const {
386     // Readonly-alias constructor:
387     return UnicodeString(false, getCharPtr() + fZero, fLength);
388 }
389 
toDebugString() const390 UnicodeString FormattedStringBuilder::toDebugString() const {
391     UnicodeString sb;
392     sb.append(u"<FormattedStringBuilder [", -1);
393     sb.append(toUnicodeString());
394     sb.append(u"] [", -1);
395     for (int i = 0; i < fLength; i++) {
396         if (fieldAt(i) == kUndefinedField) {
397             sb.append(u'n');
398         } else if (fieldAt(i).getCategory() == UFIELD_CATEGORY_NUMBER) {
399             char16_t c;
400             switch (fieldAt(i).getField()) {
401                 case UNUM_SIGN_FIELD:
402                     c = u'-';
403                     break;
404                 case UNUM_INTEGER_FIELD:
405                     c = u'i';
406                     break;
407                 case UNUM_FRACTION_FIELD:
408                     c = u'f';
409                     break;
410                 case UNUM_EXPONENT_FIELD:
411                     c = u'e';
412                     break;
413                 case UNUM_EXPONENT_SIGN_FIELD:
414                     c = u'+';
415                     break;
416                 case UNUM_EXPONENT_SYMBOL_FIELD:
417                     c = u'E';
418                     break;
419                 case UNUM_DECIMAL_SEPARATOR_FIELD:
420                     c = u'.';
421                     break;
422                 case UNUM_GROUPING_SEPARATOR_FIELD:
423                     c = u',';
424                     break;
425                 case UNUM_PERCENT_FIELD:
426                     c = u'%';
427                     break;
428                 case UNUM_PERMILL_FIELD:
429                     c = u'‰';
430                     break;
431                 case UNUM_CURRENCY_FIELD:
432                     c = u'$';
433                     break;
434                 default:
435                     c = u'0' + fieldAt(i).getField();
436                     break;
437             }
438             sb.append(c);
439         } else {
440             sb.append(u'0' + fieldAt(i).getCategory());
441         }
442     }
443     sb.append(u"]>", -1);
444     return sb;
445 }
446 
chars() const447 const char16_t *FormattedStringBuilder::chars() const {
448     return getCharPtr() + fZero;
449 }
450 
contentEquals(const FormattedStringBuilder & other) const451 bool FormattedStringBuilder::contentEquals(const FormattedStringBuilder &other) const {
452     if (fLength != other.fLength) {
453         return false;
454     }
455     for (int32_t i = 0; i < fLength; i++) {
456         if (charAt(i) != other.charAt(i) || fieldAt(i) != other.fieldAt(i)) {
457             return false;
458         }
459     }
460     return true;
461 }
462 
containsField(Field field) const463 bool FormattedStringBuilder::containsField(Field field) const {
464     for (int32_t i = 0; i < fLength; i++) {
465         if (field == fieldAt(i)) {
466             return true;
467         }
468     }
469     return false;
470 }
471 
472 U_NAMESPACE_END
473 
474 #endif /* #if !UCONFIG_NO_FORMATTING */
475