• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #if !UCONFIG_NO_FORMATTING
7 
8 #include "formatted_string_builder.h"
9 #include "putilimp.h"
10 #include "unicode/ustring.h"
11 #include "unicode/utf16.h"
12 #include "unicode/unum.h" // for UNumberFormatFields literals
13 
14 namespace {
15 
16 // A version of uprv_memcpy that checks for length 0.
17 // By default, uprv_memcpy requires a length of at least 1.
uprv_memcpy2(void * dest,const void * src,size_t len)18 inline void uprv_memcpy2(void* dest, const void* src, size_t len) {
19     if (len > 0) {
20         uprv_memcpy(dest, src, len);
21     }
22 }
23 
24 // A version of uprv_memmove that checks for length 0.
25 // By default, uprv_memmove requires a length of at least 1.
uprv_memmove2(void * dest,const void * src,size_t len)26 inline void uprv_memmove2(void* dest, const void* src, size_t len) {
27     if (len > 0) {
28         uprv_memmove(dest, src, len);
29     }
30 }
31 
32 } // namespace
33 
34 
35 U_NAMESPACE_BEGIN
36 
FormattedStringBuilder()37 FormattedStringBuilder::FormattedStringBuilder() {
38 #if U_DEBUG
39     // Initializing the memory to non-zero helps catch some bugs that involve
40     // reading from an improperly terminated string.
41     for (int32_t i=0; i<getCapacity(); i++) {
42         getCharPtr()[i] = 1;
43     }
44 #endif
45 }
46 
~FormattedStringBuilder()47 FormattedStringBuilder::~FormattedStringBuilder() {
48     if (fUsingHeap) {
49         uprv_free(fChars.heap.ptr);
50         uprv_free(fFields.heap.ptr);
51     }
52 }
53 
FormattedStringBuilder(const FormattedStringBuilder & other)54 FormattedStringBuilder::FormattedStringBuilder(const FormattedStringBuilder &other) {
55     *this = other;
56 }
57 
operator =(const FormattedStringBuilder & other)58 FormattedStringBuilder &FormattedStringBuilder::operator=(const FormattedStringBuilder &other) {
59     // Check for self-assignment
60     if (this == &other) {
61         return *this;
62     }
63 
64     // Continue with deallocation and copying
65     if (fUsingHeap) {
66         uprv_free(fChars.heap.ptr);
67         uprv_free(fFields.heap.ptr);
68         fUsingHeap = false;
69     }
70 
71     int32_t capacity = other.getCapacity();
72     if (capacity > DEFAULT_CAPACITY) {
73         // FIXME: uprv_malloc
74         // C++ note: malloc appears in two places: here and in prepareForInsertHelper.
75         auto newChars = static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * capacity));
76         auto newFields = static_cast<Field *>(uprv_malloc(sizeof(Field) * capacity));
77         if (newChars == nullptr || newFields == nullptr) {
78             // UErrorCode is not available; fail silently.
79             uprv_free(newChars);
80             uprv_free(newFields);
81             *this = FormattedStringBuilder();  // can't fail
82             return *this;
83         }
84 
85         fUsingHeap = true;
86         fChars.heap.capacity = capacity;
87         fChars.heap.ptr = newChars;
88         fFields.heap.capacity = capacity;
89         fFields.heap.ptr = newFields;
90     }
91 
92     uprv_memcpy2(getCharPtr(), other.getCharPtr(), sizeof(char16_t) * capacity);
93     uprv_memcpy2(getFieldPtr(), other.getFieldPtr(), sizeof(Field) * capacity);
94 
95     fZero = other.fZero;
96     fLength = other.fLength;
97     return *this;
98 }
99 
length() const100 int32_t FormattedStringBuilder::length() const {
101     return fLength;
102 }
103 
codePointCount() const104 int32_t FormattedStringBuilder::codePointCount() const {
105     return u_countChar32(getCharPtr() + fZero, fLength);
106 }
107 
getFirstCodePoint() const108 UChar32 FormattedStringBuilder::getFirstCodePoint() const {
109     if (fLength == 0) {
110         return -1;
111     }
112     UChar32 cp;
113     U16_GET(getCharPtr() + fZero, 0, 0, fLength, cp);
114     return cp;
115 }
116 
getLastCodePoint() const117 UChar32 FormattedStringBuilder::getLastCodePoint() const {
118     if (fLength == 0) {
119         return -1;
120     }
121     int32_t offset = fLength;
122     U16_BACK_1(getCharPtr() + fZero, 0, offset);
123     UChar32 cp;
124     U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp);
125     return cp;
126 }
127 
codePointAt(int32_t index) const128 UChar32 FormattedStringBuilder::codePointAt(int32_t index) const {
129     UChar32 cp;
130     U16_GET(getCharPtr() + fZero, 0, index, fLength, cp);
131     return cp;
132 }
133 
codePointBefore(int32_t index) const134 UChar32 FormattedStringBuilder::codePointBefore(int32_t index) const {
135     int32_t offset = index;
136     U16_BACK_1(getCharPtr() + fZero, 0, offset);
137     UChar32 cp;
138     U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp);
139     return cp;
140 }
141 
clear()142 FormattedStringBuilder &FormattedStringBuilder::clear() {
143     // TODO: Reset the heap here?
144     fZero = getCapacity() / 2;
145     fLength = 0;
146     return *this;
147 }
148 
149 int32_t
insertCodePoint(int32_t index,UChar32 codePoint,Field field,UErrorCode & status)150 FormattedStringBuilder::insertCodePoint(int32_t index, UChar32 codePoint, Field field, UErrorCode &status) {
151     int32_t count = U16_LENGTH(codePoint);
152     int32_t position = prepareForInsert(index, count, status);
153     if (U_FAILURE(status)) {
154         return count;
155     }
156     if (count == 1) {
157         getCharPtr()[position] = (char16_t) codePoint;
158         getFieldPtr()[position] = field;
159     } else {
160         getCharPtr()[position] = U16_LEAD(codePoint);
161         getCharPtr()[position + 1] = U16_TRAIL(codePoint);
162         getFieldPtr()[position] = getFieldPtr()[position + 1] = field;
163     }
164     return count;
165 }
166 
insert(int32_t index,const UnicodeString & unistr,Field field,UErrorCode & status)167 int32_t FormattedStringBuilder::insert(int32_t index, const UnicodeString &unistr, Field field,
168                                     UErrorCode &status) {
169     if (unistr.length() == 0) {
170         // Nothing to insert.
171         return 0;
172     } else if (unistr.length() == 1) {
173         // Fast path: insert using insertCodePoint.
174         return insertCodePoint(index, unistr.charAt(0), field, status);
175     } else {
176         return insert(index, unistr, 0, unistr.length(), field, status);
177     }
178 }
179 
180 int32_t
insert(int32_t index,const UnicodeString & unistr,int32_t start,int32_t end,Field field,UErrorCode & status)181 FormattedStringBuilder::insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end,
182                             Field field, UErrorCode &status) {
183     int32_t count = end - start;
184     int32_t position = prepareForInsert(index, count, status);
185     if (U_FAILURE(status)) {
186         return count;
187     }
188     for (int32_t i = 0; i < count; i++) {
189         getCharPtr()[position + i] = unistr.charAt(start + i);
190         getFieldPtr()[position + i] = field;
191     }
192     return count;
193 }
194 
195 int32_t
splice(int32_t startThis,int32_t endThis,const UnicodeString & unistr,int32_t startOther,int32_t endOther,Field field,UErrorCode & status)196 FormattedStringBuilder::splice(int32_t startThis, int32_t endThis,  const UnicodeString &unistr,
197                             int32_t startOther, int32_t endOther, Field field, UErrorCode& status) {
198     int32_t thisLength = endThis - startThis;
199     int32_t otherLength = endOther - startOther;
200     int32_t count = otherLength - thisLength;
201     if (U_FAILURE(status)) {
202         return count;
203     }
204     int32_t position;
205     if (count > 0) {
206         // Overall, chars need to be added.
207         position = prepareForInsert(startThis, count, status);
208     } else {
209         // Overall, chars need to be removed or kept the same.
210         position = remove(startThis, -count);
211     }
212     if (U_FAILURE(status)) {
213         return count;
214     }
215     for (int32_t i = 0; i < otherLength; i++) {
216         getCharPtr()[position + i] = unistr.charAt(startOther + i);
217         getFieldPtr()[position + i] = field;
218     }
219     return count;
220 }
221 
append(const FormattedStringBuilder & other,UErrorCode & status)222 int32_t FormattedStringBuilder::append(const FormattedStringBuilder &other, UErrorCode &status) {
223     return insert(fLength, other, status);
224 }
225 
226 int32_t
insert(int32_t index,const FormattedStringBuilder & other,UErrorCode & status)227 FormattedStringBuilder::insert(int32_t index, const FormattedStringBuilder &other, UErrorCode &status) {
228     if (U_FAILURE(status)) {
229         return 0;
230     }
231     if (this == &other) {
232         status = U_ILLEGAL_ARGUMENT_ERROR;
233         return 0;
234     }
235     int32_t count = other.fLength;
236     if (count == 0) {
237         // Nothing to insert.
238         return 0;
239     }
240     int32_t position = prepareForInsert(index, count, status);
241     if (U_FAILURE(status)) {
242         return count;
243     }
244     for (int32_t i = 0; i < count; i++) {
245         getCharPtr()[position + i] = other.charAt(i);
246         getFieldPtr()[position + i] = other.fieldAt(i);
247     }
248     return count;
249 }
250 
writeTerminator(UErrorCode & status)251 void FormattedStringBuilder::writeTerminator(UErrorCode& status) {
252     int32_t position = prepareForInsert(fLength, 1, status);
253     if (U_FAILURE(status)) {
254         return;
255     }
256     getCharPtr()[position] = 0;
257     getFieldPtr()[position] = kUndefinedField;
258     fLength--;
259 }
260 
prepareForInsert(int32_t index,int32_t count,UErrorCode & status)261 int32_t FormattedStringBuilder::prepareForInsert(int32_t index, int32_t count, UErrorCode &status) {
262     U_ASSERT(index >= 0);
263     U_ASSERT(index <= fLength);
264     U_ASSERT(count >= 0);
265     U_ASSERT(fZero >= 0);
266     U_ASSERT(fLength >= 0);
267     U_ASSERT(getCapacity() - fZero >= fLength);
268     if (U_FAILURE(status)) {
269         return count;
270     }
271     if (index == 0 && fZero - count >= 0) {
272         // Append to start
273         fZero -= count;
274         fLength += count;
275         return fZero;
276     } else if (index == fLength && count <= getCapacity() - fZero - fLength) {
277         // Append to end
278         fLength += count;
279         return fZero + fLength - count;
280     } else {
281         // Move chars around and/or allocate more space
282         return prepareForInsertHelper(index, count, status);
283     }
284 }
285 
prepareForInsertHelper(int32_t index,int32_t count,UErrorCode & status)286 int32_t FormattedStringBuilder::prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status) {
287     int32_t oldCapacity = getCapacity();
288     int32_t oldZero = fZero;
289     char16_t *oldChars = getCharPtr();
290     Field *oldFields = getFieldPtr();
291     int32_t newLength;
292     if (uprv_add32_overflow(fLength, count, &newLength)) {
293         status = U_INPUT_TOO_LONG_ERROR;
294         return -1;
295     }
296     int32_t newZero;
297     if (newLength > oldCapacity) {
298         if (newLength > INT32_MAX / 2) {
299             // We do not support more than 1G char16_t in this code because
300             // dealing with >2G *bytes* can cause subtle bugs.
301             status = U_INPUT_TOO_LONG_ERROR;
302             return -1;
303         }
304         // Keep newCapacity also to at most 1G char16_t.
305         int32_t newCapacity = newLength * 2;
306         newZero = (newCapacity - newLength) / 2;
307 
308         // C++ note: malloc appears in two places: here and in the assignment operator.
309         auto newChars = static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * static_cast<size_t>(newCapacity)));
310         auto newFields = static_cast<Field *>(uprv_malloc(sizeof(Field) * static_cast<size_t>(newCapacity)));
311         if (newChars == nullptr || newFields == nullptr) {
312             uprv_free(newChars);
313             uprv_free(newFields);
314             status = U_MEMORY_ALLOCATION_ERROR;
315             return -1;
316         }
317 
318         // First copy the prefix and then the suffix, leaving room for the new chars that the
319         // caller wants to insert.
320         // C++ note: memcpy is OK because the src and dest do not overlap.
321         uprv_memcpy2(newChars + newZero, oldChars + oldZero, sizeof(char16_t) * index);
322         uprv_memcpy2(newChars + newZero + index + count,
323                 oldChars + oldZero + index,
324                 sizeof(char16_t) * (fLength - index));
325         uprv_memcpy2(newFields + newZero, oldFields + oldZero, sizeof(Field) * index);
326         uprv_memcpy2(newFields + newZero + index + count,
327                 oldFields + oldZero + index,
328                 sizeof(Field) * (fLength - index));
329 
330         if (fUsingHeap) {
331             uprv_free(oldChars);
332             uprv_free(oldFields);
333         }
334         fUsingHeap = true;
335         fChars.heap.ptr = newChars;
336         fChars.heap.capacity = newCapacity;
337         fFields.heap.ptr = newFields;
338         fFields.heap.capacity = newCapacity;
339     } else {
340         newZero = (oldCapacity - newLength) / 2;
341 
342         // C++ note: memmove is required because src and dest may overlap.
343         // First copy the entire string to the location of the prefix, and then move the suffix
344         // to make room for the new chars that the caller wants to insert.
345         uprv_memmove2(oldChars + newZero, oldChars + oldZero, sizeof(char16_t) * fLength);
346         uprv_memmove2(oldChars + newZero + index + count,
347                 oldChars + newZero + index,
348                 sizeof(char16_t) * (fLength - index));
349         uprv_memmove2(oldFields + newZero, oldFields + oldZero, sizeof(Field) * fLength);
350         uprv_memmove2(oldFields + newZero + index + count,
351                 oldFields + newZero + index,
352                 sizeof(Field) * (fLength - index));
353     }
354     fZero = newZero;
355     fLength = newLength;
356     return fZero + index;
357 }
358 
remove(int32_t index,int32_t count)359 int32_t FormattedStringBuilder::remove(int32_t index, int32_t count) {
360      U_ASSERT(0 <= index);
361      U_ASSERT(index <= fLength);
362      U_ASSERT(count <= (fLength - index));
363      U_ASSERT(index <= getCapacity() - fZero);
364 
365     int32_t position = index + fZero;
366     // TODO: Reset the heap here?  (If the string after removal can fit on stack?)
367     uprv_memmove2(getCharPtr() + position,
368             getCharPtr() + position + count,
369             sizeof(char16_t) * (fLength - index - count));
370     uprv_memmove2(getFieldPtr() + position,
371             getFieldPtr() + position + count,
372             sizeof(Field) * (fLength - index - count));
373     fLength -= count;
374     return position;
375 }
376 
toUnicodeString() const377 UnicodeString FormattedStringBuilder::toUnicodeString() const {
378     return UnicodeString(getCharPtr() + fZero, fLength);
379 }
380 
toTempUnicodeString() const381 const UnicodeString FormattedStringBuilder::toTempUnicodeString() const {
382     // Readonly-alias constructor:
383     return UnicodeString(false, getCharPtr() + fZero, fLength);
384 }
385 
toDebugString() const386 UnicodeString FormattedStringBuilder::toDebugString() const {
387     UnicodeString sb;
388     sb.append(u"<FormattedStringBuilder [", -1);
389     sb.append(toUnicodeString());
390     sb.append(u"] [", -1);
391     for (int i = 0; i < fLength; i++) {
392         if (fieldAt(i) == kUndefinedField) {
393             sb.append(u'n');
394         } else if (fieldAt(i).getCategory() == UFIELD_CATEGORY_NUMBER) {
395             char16_t c;
396             switch (fieldAt(i).getField()) {
397                 case UNUM_SIGN_FIELD:
398                     c = u'-';
399                     break;
400                 case UNUM_INTEGER_FIELD:
401                     c = u'i';
402                     break;
403                 case UNUM_FRACTION_FIELD:
404                     c = u'f';
405                     break;
406                 case UNUM_EXPONENT_FIELD:
407                     c = u'e';
408                     break;
409                 case UNUM_EXPONENT_SIGN_FIELD:
410                     c = u'+';
411                     break;
412                 case UNUM_EXPONENT_SYMBOL_FIELD:
413                     c = u'E';
414                     break;
415                 case UNUM_DECIMAL_SEPARATOR_FIELD:
416                     c = u'.';
417                     break;
418                 case UNUM_GROUPING_SEPARATOR_FIELD:
419                     c = u',';
420                     break;
421                 case UNUM_PERCENT_FIELD:
422                     c = u'%';
423                     break;
424                 case UNUM_PERMILL_FIELD:
425                     c = u'‰';
426                     break;
427                 case UNUM_CURRENCY_FIELD:
428                     c = u'$';
429                     break;
430                 default:
431                     c = u'0' + fieldAt(i).getField();
432                     break;
433             }
434             sb.append(c);
435         } else {
436             sb.append(u'0' + fieldAt(i).getCategory());
437         }
438     }
439     sb.append(u"]>", -1);
440     return sb;
441 }
442 
chars() const443 const char16_t *FormattedStringBuilder::chars() const {
444     return getCharPtr() + fZero;
445 }
446 
contentEquals(const FormattedStringBuilder & other) const447 bool FormattedStringBuilder::contentEquals(const FormattedStringBuilder &other) const {
448     if (fLength != other.fLength) {
449         return false;
450     }
451     for (int32_t i = 0; i < fLength; i++) {
452         if (charAt(i) != other.charAt(i) || fieldAt(i) != other.fieldAt(i)) {
453             return false;
454         }
455     }
456     return true;
457 }
458 
containsField(Field field) const459 bool FormattedStringBuilder::containsField(Field field) const {
460     for (int32_t i = 0; i < fLength; i++) {
461         if (field == fieldAt(i)) {
462             return true;
463         }
464     }
465     return false;
466 }
467 
468 U_NAMESPACE_END
469 
470 #endif /* #if !UCONFIG_NO_FORMATTING */
471