1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 #include "unicode/utypes.h"
5
6 #if !UCONFIG_NO_FORMATTING
7 #ifndef __NUMBER_STRINGBUILDER_H__
8 #define __NUMBER_STRINGBUILDER_H__
9
10
11 #include <cstdint>
12 #include <type_traits>
13
14 #include "cstring.h"
15 #include "uassert.h"
16 #include "fphdlimp.h"
17
18 U_NAMESPACE_BEGIN
19
20 class FormattedValueStringBuilderImpl;
21
22 /**
23 * A StringBuilder optimized for formatting. It implements the following key
24 * features beyond a UnicodeString:
25 *
26 * <ol>
27 * <li>Efficient prepend as well as append.
28 * <li>Keeps track of Fields in an efficient manner.
29 * </ol>
30 *
31 * See also FormattedValueStringBuilderImpl.
32 *
33 * @author sffc (Shane Carr)
34 */
35 class U_I18N_API FormattedStringBuilder : public UMemory {
36 private:
37 static const int32_t DEFAULT_CAPACITY = 40;
38
39 template<typename T>
40 union ValueOrHeapArray {
41 T value[DEFAULT_CAPACITY];
42 struct {
43 T *ptr;
44 int32_t capacity;
45 } heap;
46 };
47
48 public:
49 FormattedStringBuilder();
50
51 ~FormattedStringBuilder();
52
53 FormattedStringBuilder(const FormattedStringBuilder &other);
54
55 // Convention: bottom 4 bits for field, top 4 bits for field category.
56 // Field category 0 implies the number category so that the number field
57 // literals can be directly passed as a Field type.
58 // Exported as U_I18N_API so it can be used by other exports on Windows.
59 struct U_I18N_API Field {
60 uint8_t bits;
61
62 Field() = default;
63 constexpr Field(uint8_t category, uint8_t field);
64
65 inline UFieldCategory getCategory() const;
66 inline int32_t getField() const;
67 inline bool isNumeric() const;
68 inline bool isUndefined() const;
69 inline bool operator==(const Field& other) const;
70 inline bool operator!=(const Field& other) const;
71 };
72
73 FormattedStringBuilder &operator=(const FormattedStringBuilder &other);
74
75 int32_t length() const;
76
77 int32_t codePointCount() const;
78
charAt(int32_t index)79 inline char16_t charAt(int32_t index) const {
80 U_ASSERT(index >= 0);
81 U_ASSERT(index < fLength);
82 return getCharPtr()[fZero + index];
83 }
84
fieldAt(int32_t index)85 inline Field fieldAt(int32_t index) const {
86 U_ASSERT(index >= 0);
87 U_ASSERT(index < fLength);
88 return getFieldPtr()[fZero + index];
89 }
90
91 UChar32 getFirstCodePoint() const;
92
93 UChar32 getLastCodePoint() const;
94
95 UChar32 codePointAt(int32_t index) const;
96
97 UChar32 codePointBefore(int32_t index) const;
98
99 FormattedStringBuilder &clear();
100
101 /** Appends a UTF-16 code unit. */
appendChar16(char16_t codeUnit,Field field,UErrorCode & status)102 inline int32_t appendChar16(char16_t codeUnit, Field field, UErrorCode& status) {
103 // appendCodePoint handles both code units and code points.
104 return insertCodePoint(fLength, codeUnit, field, status);
105 }
106
107 /** Inserts a UTF-16 code unit. Note: insert at index 0 is very efficient. */
insertChar16(int32_t index,char16_t codeUnit,Field field,UErrorCode & status)108 inline int32_t insertChar16(int32_t index, char16_t codeUnit, Field field, UErrorCode& status) {
109 // insertCodePoint handles both code units and code points.
110 return insertCodePoint(index, codeUnit, field, status);
111 }
112
113 /** Appends a Unicode code point. */
appendCodePoint(UChar32 codePoint,Field field,UErrorCode & status)114 inline int32_t appendCodePoint(UChar32 codePoint, Field field, UErrorCode &status) {
115 return insertCodePoint(fLength, codePoint, field, status);
116 }
117
118 /** Inserts a Unicode code point. Note: insert at index 0 is very efficient. */
119 int32_t insertCodePoint(int32_t index, UChar32 codePoint, Field field, UErrorCode &status);
120
121 /** Appends a string. */
append(const UnicodeString & unistr,Field field,UErrorCode & status)122 inline int32_t append(const UnicodeString &unistr, Field field, UErrorCode &status) {
123 return insert(fLength, unistr, field, status);
124 }
125
126 /** Inserts a string. Note: insert at index 0 is very efficient. */
127 int32_t insert(int32_t index, const UnicodeString &unistr, Field field, UErrorCode &status);
128
129 /** Inserts a substring. Note: insert at index 0 is very efficient.
130 *
131 * @param start Start index of the substring of unistr to be inserted.
132 * @param end End index of the substring of unistr to be inserted (exclusive).
133 */
134 int32_t insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end, Field field,
135 UErrorCode &status);
136
137 /** Deletes a substring and then inserts a string at that same position.
138 * Similar to JavaScript Array.prototype.splice().
139 *
140 * @param startThis Start of the span to delete.
141 * @param endThis End of the span to delete (exclusive).
142 * @param unistr The string to insert at the deletion position.
143 * @param startOther Start index of the substring of unistr to be inserted.
144 * @param endOther End index of the substring of unistr to be inserted (exclusive).
145 */
146 int32_t splice(int32_t startThis, int32_t endThis, const UnicodeString &unistr,
147 int32_t startOther, int32_t endOther, Field field, UErrorCode& status);
148
149 /** Appends a formatted string. */
150 int32_t append(const FormattedStringBuilder &other, UErrorCode &status);
151
152 /** Inserts a formatted string. Note: insert at index 0 is very efficient. */
153 int32_t insert(int32_t index, const FormattedStringBuilder &other, UErrorCode &status);
154
155 /**
156 * Ensures that the string buffer contains a NUL terminator. The NUL terminator does
157 * not count toward the string length. Any further changes to the string (insert or
158 * append) may invalidate the NUL terminator.
159 *
160 * You should call this method after the formatted string is completely built if you
161 * plan to return a pointer to the string from a C API.
162 */
163 void writeTerminator(UErrorCode& status);
164
165 /**
166 * Gets a "safe" UnicodeString that can be used even after the FormattedStringBuilder is destructed.
167 */
168 UnicodeString toUnicodeString() const;
169
170 /**
171 * Gets an "unsafe" UnicodeString that is valid only as long as the FormattedStringBuilder is alive and
172 * unchanged. Slightly faster than toUnicodeString().
173 */
174 const UnicodeString toTempUnicodeString() const;
175
176 UnicodeString toDebugString() const;
177
178 const char16_t *chars() const;
179
180 bool contentEquals(const FormattedStringBuilder &other) const;
181
182 bool containsField(Field field) const;
183
184 private:
185 bool fUsingHeap = false;
186 ValueOrHeapArray<char16_t> fChars;
187 ValueOrHeapArray<Field> fFields;
188 int32_t fZero = DEFAULT_CAPACITY / 2;
189 int32_t fLength = 0;
190
getCharPtr()191 inline char16_t *getCharPtr() {
192 return fUsingHeap ? fChars.heap.ptr : fChars.value;
193 }
194
getCharPtr()195 inline const char16_t *getCharPtr() const {
196 return fUsingHeap ? fChars.heap.ptr : fChars.value;
197 }
198
getFieldPtr()199 inline Field *getFieldPtr() {
200 return fUsingHeap ? fFields.heap.ptr : fFields.value;
201 }
202
getFieldPtr()203 inline const Field *getFieldPtr() const {
204 return fUsingHeap ? fFields.heap.ptr : fFields.value;
205 }
206
getCapacity()207 inline int32_t getCapacity() const {
208 return fUsingHeap ? fChars.heap.capacity : DEFAULT_CAPACITY;
209 }
210
211 int32_t prepareForInsert(int32_t index, int32_t count, UErrorCode &status);
212
213 int32_t prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status);
214
215 int32_t remove(int32_t index, int32_t count);
216
217 friend class FormattedValueStringBuilderImpl;
218 };
219
220 static_assert(
221 // std::is_pod<> is deprecated.
222 std::is_standard_layout<FormattedStringBuilder::Field>::value &&
223 std::is_trivial<FormattedStringBuilder::Field>::value,
224 "Field should be a POD type for efficient initialization");
225
Field(uint8_t category,uint8_t field)226 constexpr FormattedStringBuilder::Field::Field(uint8_t category, uint8_t field)
227 : bits((
228 U_ASSERT(category <= 0xf),
229 U_ASSERT(field <= 0xf),
230 static_cast<uint8_t>((category << 4) | field)
231 )) {}
232
233 /**
234 * Internal constant for the undefined field for use in FormattedStringBuilder.
235 */
236 constexpr FormattedStringBuilder::Field kUndefinedField = {UFIELD_CATEGORY_UNDEFINED, 0};
237
238 /**
239 * Internal field to signal "numeric" when fields are not supported in NumberFormat.
240 */
241 constexpr FormattedStringBuilder::Field kGeneralNumericField = {UFIELD_CATEGORY_UNDEFINED, 1};
242
getCategory()243 inline UFieldCategory FormattedStringBuilder::Field::getCategory() const {
244 return static_cast<UFieldCategory>(bits >> 4);
245 }
246
getField()247 inline int32_t FormattedStringBuilder::Field::getField() const {
248 return bits & 0xf;
249 }
250
isNumeric()251 inline bool FormattedStringBuilder::Field::isNumeric() const {
252 return getCategory() == UFIELD_CATEGORY_NUMBER || *this == kGeneralNumericField;
253 }
254
isUndefined()255 inline bool FormattedStringBuilder::Field::isUndefined() const {
256 return getCategory() == UFIELD_CATEGORY_UNDEFINED;
257 }
258
259 inline bool FormattedStringBuilder::Field::operator==(const Field& other) const {
260 return bits == other.bits;
261 }
262
263 inline bool FormattedStringBuilder::Field::operator!=(const Field& other) const {
264 return bits != other.bits;
265 }
266
267 U_NAMESPACE_END
268
269
270 #endif //__NUMBER_STRINGBUILDER_H__
271
272 #endif /* #if !UCONFIG_NO_FORMATTING */
273