1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (C) 1998-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 *
9 * File unistr.h
10 *
11 * Modification History:
12 *
13 * Date Name Description
14 * 09/25/98 stephen Creation.
15 * 11/11/98 stephen Changed per 11/9 code review.
16 * 04/20/99 stephen Overhauled per 4/16 code review.
17 * 11/18/99 aliu Made to inherit from Replaceable. Added method
18 * handleReplaceBetween(); other methods unchanged.
19 * 06/25/01 grhoten Remove dependency on iostream.
20 ******************************************************************************
21 */
22
23 #ifndef UNISTR_H
24 #define UNISTR_H
25
26 /**
27 * \file
28 * \brief C++ API: Unicode String
29 */
30
31 #include "unicode/utypes.h"
32
33 #if U_SHOW_CPLUSPLUS_API
34
35 #include <cstddef>
36 #include <string_view>
37 #include "unicode/char16ptr.h"
38 #include "unicode/rep.h"
39 #include "unicode/std_string.h"
40 #include "unicode/stringpiece.h"
41 #include "unicode/bytestream.h"
42
43 struct UConverter; // unicode/ucnv.h
44
45 #ifndef USTRING_H
46 /**
47 * \ingroup ustring_ustrlen
48 * @param s Pointer to sequence of UChars.
49 * @return Length of sequence.
50 */
51 U_CAPI int32_t U_EXPORT2 u_strlen(const UChar *s);
52 #endif
53
54 U_NAMESPACE_BEGIN
55
56 #if !UCONFIG_NO_BREAK_ITERATION
57 class BreakIterator; // unicode/brkiter.h
58 #endif
59 class Edits;
60
61 U_NAMESPACE_END
62
63 // Not #ifndef U_HIDE_INTERNAL_API because UnicodeString needs the UStringCaseMapper.
64 /**
65 * Internal string case mapping function type.
66 * All error checking must be done.
67 * src and dest must not overlap.
68 * @internal
69 */
70 typedef int32_t U_CALLCONV
71 UStringCaseMapper(int32_t caseLocale, uint32_t options,
72 #if !UCONFIG_NO_BREAK_ITERATION
73 icu::BreakIterator *iter,
74 #endif
75 char16_t *dest, int32_t destCapacity,
76 const char16_t *src, int32_t srcLength,
77 icu::Edits *edits,
78 UErrorCode &errorCode);
79
80 U_NAMESPACE_BEGIN
81
82 class Locale; // unicode/locid.h
83 class StringCharacterIterator;
84 class UnicodeStringAppendable; // unicode/appendable.h
85
86 /* The <iostream> include has been moved to unicode/ustream.h */
87
88 /**
89 * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
90 * which constructs a Unicode string from an invariant-character char * string.
91 * About invariant characters see utypes.h.
92 * This constructor has no runtime dependency on conversion code and is
93 * therefore recommended over ones taking a charset name string
94 * (where the empty string "" indicates invariant-character conversion).
95 *
96 * @stable ICU 3.2
97 */
98 #define US_INV icu::UnicodeString::kInvariant
99
100 /**
101 * \def UNICODE_STRING
102 * Obsolete macro approximating UnicodeString literals.
103 *
104 * Prior to the availability of C++11 and u"UTF-16 string literals",
105 * this macro was provided for portability and efficiency when
106 * initializing UnicodeStrings from literals.
107 *
108 * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
109 * length determination:
110 * \code
111 * UnicodeString str(u"literal");
112 * if (str == u"other literal") { ... }
113 * \endcode
114 *
115 * The string parameter must be a C string literal.
116 * The length of the string, not including the terminating
117 * `NUL`, must be specified as a constant.
118 * @stable ICU 2.0
119 */
120 #if !U_CHAR16_IS_TYPEDEF
121 # define UNICODE_STRING(cs, _length) icu::UnicodeString(true, u ## cs, _length)
122 #else
123 # define UNICODE_STRING(cs, _length) icu::UnicodeString(true, (const char16_t*)u ## cs, _length)
124 #endif
125
126 /**
127 * Unicode String literals in C++.
128 * Obsolete macro approximating UnicodeString literals.
129 * See UNICODE_STRING.
130 *
131 * The string parameter must be a C string literal.
132 * @stable ICU 2.0
133 * @see UNICODE_STRING
134 */
135 #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
136
137 /**
138 * \def UNISTR_FROM_CHAR_EXPLICIT
139 * This can be defined to be empty or "explicit".
140 * If explicit, then the UnicodeString(char16_t) and UnicodeString(UChar32)
141 * constructors are marked as explicit, preventing their inadvertent use.
142 * @stable ICU 49
143 */
144 #ifndef UNISTR_FROM_CHAR_EXPLICIT
145 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
146 // Auto-"explicit" in ICU library code.
147 # define UNISTR_FROM_CHAR_EXPLICIT explicit
148 # else
149 // Empty by default for source code compatibility.
150 # define UNISTR_FROM_CHAR_EXPLICIT
151 # endif
152 #endif
153
154 /**
155 * \def UNISTR_FROM_STRING_EXPLICIT
156 * This can be defined to be empty or "explicit".
157 * If explicit, then the UnicodeString(const char *) and UnicodeString(const char16_t *)
158 * constructors are marked as explicit, preventing their inadvertent use.
159 *
160 * In particular, this helps prevent accidentally depending on ICU conversion code
161 * by passing a string literal into an API with a const UnicodeString & parameter.
162 * @stable ICU 49
163 */
164 #ifndef UNISTR_FROM_STRING_EXPLICIT
165 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
166 // Auto-"explicit" in ICU library code.
167 # define UNISTR_FROM_STRING_EXPLICIT explicit
168 # else
169 // Empty by default for source code compatibility.
170 # define UNISTR_FROM_STRING_EXPLICIT
171 # endif
172 #endif
173
174 /**
175 * \def UNISTR_OBJECT_SIZE
176 * Desired sizeof(UnicodeString) in bytes.
177 * It should be a multiple of sizeof(pointer) to avoid unusable space for padding.
178 * The object size may want to be a multiple of 16 bytes,
179 * which is a common granularity for heap allocation.
180 *
181 * Any space inside the object beyond sizeof(vtable pointer) + 2
182 * is available for storing short strings inside the object.
183 * The bigger the object, the longer a string that can be stored inside the object,
184 * without additional heap allocation.
185 *
186 * Depending on a platform's pointer size, pointer alignment requirements,
187 * and struct padding, the compiler will usually round up sizeof(UnicodeString)
188 * to 4 * sizeof(pointer) (or 3 * sizeof(pointer) for P128 data models),
189 * to hold the fields for heap-allocated strings.
190 * Such a minimum size also ensures that the object is easily large enough
191 * to hold at least 2 char16_ts, for one supplementary code point (U16_MAX_LENGTH).
192 *
193 * sizeof(UnicodeString) >= 48 should work for all known platforms.
194 *
195 * For example, on a 64-bit machine where sizeof(vtable pointer) is 8,
196 * sizeof(UnicodeString) = 64 would leave space for
197 * (64 - sizeof(vtable pointer) - 2) / U_SIZEOF_UCHAR = (64 - 8 - 2) / 2 = 27
198 * char16_ts stored inside the object.
199 *
200 * The minimum object size on a 64-bit machine would be
201 * 4 * sizeof(pointer) = 4 * 8 = 32 bytes,
202 * and the internal buffer would hold up to 11 char16_ts in that case.
203 *
204 * @see U16_MAX_LENGTH
205 * @stable ICU 56
206 */
207 #ifndef UNISTR_OBJECT_SIZE
208 # define UNISTR_OBJECT_SIZE 64
209 #endif
210
211 /**
212 * UnicodeString is a string class that stores Unicode characters directly and provides
213 * similar functionality as the Java String and StringBuffer/StringBuilder classes.
214 * It is a concrete implementation of the abstract class Replaceable (for transliteration).
215 *
216 * The UnicodeString equivalent of std::string’s clear() is remove().
217 *
218 * A UnicodeString may "alias" an external array of characters
219 * (that is, point to it, rather than own the array)
220 * whose lifetime must then at least match the lifetime of the aliasing object.
221 * This aliasing may be preserved when returning a UnicodeString by value,
222 * depending on the compiler and the function implementation,
223 * via Return Value Optimization (RVO) or the move assignment operator.
224 * (However, the copy assignment operator does not preserve aliasing.)
225 * For details see the description of storage models at the end of the class API docs
226 * and in the User Guide chapter linked from there.
227 *
228 * The UnicodeString class is not suitable for subclassing.
229 *
230 * For an overview of Unicode strings in C and C++ see the
231 * [User Guide Strings chapter](https://unicode-org.github.io/icu/userguide/strings#strings-in-cc).
232 *
233 * In ICU, a Unicode string consists of 16-bit Unicode *code units*.
234 * A Unicode character may be stored with either one code unit
235 * (the most common case) or with a matched pair of special code units
236 * ("surrogates"). The data type for code units is char16_t.
237 * For single-character handling, a Unicode character code *point* is a value
238 * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.
239 *
240 * Indexes and offsets into and lengths of strings always count code units, not code points.
241 * This is the same as with multi-byte char* strings in traditional string handling.
242 * Operations on partial strings typically do not test for code point boundaries.
243 * If necessary, the user needs to take care of such boundaries by testing for the code unit
244 * values or by using functions like
245 * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
246 * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).
247 *
248 * UnicodeString methods are more lenient with regard to input parameter values
249 * than other ICU APIs. In particular:
250 * - If indexes are out of bounds for a UnicodeString object
251 * (< 0 or > length()) then they are "pinned" to the nearest boundary.
252 * - If the buffer passed to an insert/append/replace operation is owned by the
253 * target object, e.g., calling str.append(str), an extra copy may take place
254 * to ensure safety.
255 * - If primitive string pointer values (e.g., const char16_t * or char *)
256 * for input strings are nullptr, then those input string parameters are treated
257 * as if they pointed to an empty string.
258 * However, this is *not* the case for char * parameters for charset names
259 * or other IDs.
260 * - Most UnicodeString methods do not take a UErrorCode parameter because
261 * there are usually very few opportunities for failure other than a shortage
262 * of memory, error codes in low-level C++ string methods would be inconvenient,
263 * and the error code as the last parameter (ICU convention) would prevent
264 * the use of default parameter values.
265 * Instead, such methods set the UnicodeString into a "bogus" state
266 * (see isBogus()) if an error occurs.
267 *
268 * In string comparisons, two UnicodeString objects that are both "bogus"
269 * compare equal (to be transitive and prevent endless loops in sorting),
270 * and a "bogus" string compares less than any non-"bogus" one.
271 *
272 * Const UnicodeString methods are thread-safe. Multiple threads can use
273 * const methods on the same UnicodeString object simultaneously,
274 * but non-const methods must not be called concurrently (in multiple threads)
275 * with any other (const or non-const) methods.
276 *
277 * Similarly, const UnicodeString & parameters are thread-safe.
278 * One object may be passed in as such a parameter concurrently in multiple threads.
279 * This includes the const UnicodeString & parameters for
280 * copy construction, assignment, and cloning.
281 *
282 * UnicodeString uses several storage methods.
283 * String contents can be stored inside the UnicodeString object itself,
284 * in an allocated and shared buffer, or in an outside buffer that is "aliased".
285 * Most of this is done transparently, but careful aliasing in particular provides
286 * significant performance improvements.
287 * Also, the internal buffer is accessible via special functions.
288 * For details see the
289 * [User Guide Strings chapter](https://unicode-org.github.io/icu/userguide/strings#maximizing-performance-with-the-unicodestring-storage-model).
290 *
291 * @see utf.h
292 * @see CharacterIterator
293 * @stable ICU 2.0
294 */
295 class U_COMMON_API UnicodeString : public Replaceable
296 {
297 public:
298
299 /**
300 * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
301 * which constructs a Unicode string from an invariant-character char * string.
302 * Use the macro US_INV instead of the full qualification for this value.
303 *
304 * @see US_INV
305 * @stable ICU 3.2
306 */
307 enum EInvariant {
308 /**
309 * @see EInvariant
310 * @stable ICU 3.2
311 */
312 kInvariant
313 };
314
315 //========================================
316 // Read-only operations
317 //========================================
318
319 /* Comparison - bitwise only - for international comparison use collation */
320
321 /**
322 * Equality operator. Performs only bitwise comparison.
323 * @param text The UnicodeString to compare to this one.
324 * @return true if `text` contains the same characters as this one,
325 * false otherwise.
326 * @stable ICU 2.0
327 */
328 inline bool operator== (const UnicodeString& text) const;
329
330 #ifndef U_HIDE_DRAFT_API
331 /**
332 * Equality operator. Performs only bitwise comparison with `text`
333 * which is, or which is implicitly convertible to,
334 * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.
335 *
336 * For performance, you can use UTF-16 string literals with compile-time
337 * length determination:
338 * \code
339 * UnicodeString str = ...;
340 * if (str == u"literal") { ... }
341 * \endcode
342 * @param text The string view to compare to this string.
343 * @return true if `text` contains the same characters as this one, false otherwise.
344 * @draft ICU 76
345 */
346 template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
347 inline bool operator==(const S &text) const {
348 std::u16string_view sv(internal::toU16StringView(text));
349 uint32_t len; // unsigned to avoid a compiler warning
350 return !isBogus() && (len = length()) == sv.length() && doEquals(sv.data(), len);
351 }
352 #endif // U_HIDE_DRAFT_API
353
354 /**
355 * Inequality operator. Performs only bitwise comparison.
356 * @param text The UnicodeString to compare to this one.
357 * @return false if `text` contains the same characters as this one,
358 * true otherwise.
359 * @stable ICU 2.0
360 */
361 inline bool operator!= (const UnicodeString& text) const;
362
363 #ifndef U_HIDE_DRAFT_API
364 /**
365 * Inequality operator. Performs only bitwise comparison with `text`
366 * which is, or which is implicitly convertible to,
367 * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.
368 *
369 * For performance, you can use std::u16string_view literals with compile-time
370 * length determination:
371 * \code
372 * #include <string_view>
373 * using namespace std::string_view_literals;
374 * UnicodeString str = ...;
375 * if (str != u"literal"sv) { ... }
376 * \endcode
377 * @param text The string view to compare to this string.
378 * @return false if `text` contains the same characters as this one, true otherwise.
379 * @draft ICU 76
380 */
381 template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
382 inline bool operator!=(const S &text) const {
383 return !operator==(text);
384 }
385 #endif // U_HIDE_DRAFT_API
386
387 /**
388 * Greater than operator. Performs only bitwise comparison.
389 * @param text The UnicodeString to compare to this one.
390 * @return true if the characters in this are bitwise
391 * greater than the characters in `text`, false otherwise
392 * @stable ICU 2.0
393 */
394 inline UBool operator> (const UnicodeString& text) const;
395
396 /**
397 * Less than operator. Performs only bitwise comparison.
398 * @param text The UnicodeString to compare to this one.
399 * @return true if the characters in this are bitwise
400 * less than the characters in `text`, false otherwise
401 * @stable ICU 2.0
402 */
403 inline UBool operator< (const UnicodeString& text) const;
404
405 /**
406 * Greater than or equal operator. Performs only bitwise comparison.
407 * @param text The UnicodeString to compare to this one.
408 * @return true if the characters in this are bitwise
409 * greater than or equal to the characters in `text`, false otherwise
410 * @stable ICU 2.0
411 */
412 inline UBool operator>= (const UnicodeString& text) const;
413
414 /**
415 * Less than or equal operator. Performs only bitwise comparison.
416 * @param text The UnicodeString to compare to this one.
417 * @return true if the characters in this are bitwise
418 * less than or equal to the characters in `text`, false otherwise
419 * @stable ICU 2.0
420 */
421 inline UBool operator<= (const UnicodeString& text) const;
422
423 /**
424 * Compare the characters bitwise in this UnicodeString to
425 * the characters in `text`.
426 * @param text The UnicodeString to compare to this one.
427 * @return The result of bitwise character comparison: 0 if this
428 * contains the same characters as `text`, -1 if the characters in
429 * this are bitwise less than the characters in `text`, +1 if the
430 * characters in this are bitwise greater than the characters
431 * in `text`.
432 * @stable ICU 2.0
433 */
434 inline int8_t compare(const UnicodeString& text) const;
435
436 /**
437 * Compare the characters bitwise in the range
438 * [`start`, `start + length`) with the characters
439 * in the **entire string** `text`.
440 * (The parameters "start" and "length" are not applied to the other text "text".)
441 * @param start the offset at which the compare operation begins
442 * @param length the number of characters of text to compare.
443 * @param text the other text to be compared against this string.
444 * @return The result of bitwise character comparison: 0 if this
445 * contains the same characters as `text`, -1 if the characters in
446 * this are bitwise less than the characters in `text`, +1 if the
447 * characters in this are bitwise greater than the characters
448 * in `text`.
449 * @stable ICU 2.0
450 */
451 inline int8_t compare(int32_t start,
452 int32_t length,
453 const UnicodeString& text) const;
454
455 /**
456 * Compare the characters bitwise in the range
457 * [`start`, `start + length`) with the characters
458 * in `srcText` in the range
459 * [`srcStart`, `srcStart + srcLength`).
460 * @param start the offset at which the compare operation begins
461 * @param length the number of characters in this to compare.
462 * @param srcText the text to be compared
463 * @param srcStart the offset into `srcText` to start comparison
464 * @param srcLength the number of characters in `src` to compare
465 * @return The result of bitwise character comparison: 0 if this
466 * contains the same characters as `srcText`, -1 if the characters in
467 * this are bitwise less than the characters in `srcText`, +1 if the
468 * characters in this are bitwise greater than the characters
469 * in `srcText`.
470 * @stable ICU 2.0
471 */
472 inline int8_t compare(int32_t start,
473 int32_t length,
474 const UnicodeString& srcText,
475 int32_t srcStart,
476 int32_t srcLength) const;
477
478 /**
479 * Compare the characters bitwise in this UnicodeString with the first
480 * `srcLength` characters in `srcChars`.
481 * @param srcChars The characters to compare to this UnicodeString.
482 * @param srcLength the number of characters in `srcChars` to compare
483 * @return The result of bitwise character comparison: 0 if this
484 * contains the same characters as `srcChars`, -1 if the characters in
485 * this are bitwise less than the characters in `srcChars`, +1 if the
486 * characters in this are bitwise greater than the characters
487 * in `srcChars`.
488 * @stable ICU 2.0
489 */
490 inline int8_t compare(ConstChar16Ptr srcChars,
491 int32_t srcLength) const;
492
493 /**
494 * Compare the characters bitwise in the range
495 * [`start`, `start + length`) with the first
496 * `length` characters in `srcChars`
497 * @param start the offset at which the compare operation begins
498 * @param length the number of characters to compare.
499 * @param srcChars the characters to be compared
500 * @return The result of bitwise character comparison: 0 if this
501 * contains the same characters as `srcChars`, -1 if the characters in
502 * this are bitwise less than the characters in `srcChars`, +1 if the
503 * characters in this are bitwise greater than the characters
504 * in `srcChars`.
505 * @stable ICU 2.0
506 */
507 inline int8_t compare(int32_t start,
508 int32_t length,
509 const char16_t *srcChars) const;
510
511 /**
512 * Compare the characters bitwise in the range
513 * [`start`, `start + length`) with the characters
514 * in `srcChars` in the range
515 * [`srcStart`, `srcStart + srcLength`).
516 * @param start the offset at which the compare operation begins
517 * @param length the number of characters in this to compare
518 * @param srcChars the characters to be compared
519 * @param srcStart the offset into `srcChars` to start comparison
520 * @param srcLength the number of characters in `srcChars` to compare
521 * @return The result of bitwise character comparison: 0 if this
522 * contains the same characters as `srcChars`, -1 if the characters in
523 * this are bitwise less than the characters in `srcChars`, +1 if the
524 * characters in this are bitwise greater than the characters
525 * in `srcChars`.
526 * @stable ICU 2.0
527 */
528 inline int8_t compare(int32_t start,
529 int32_t length,
530 const char16_t *srcChars,
531 int32_t srcStart,
532 int32_t srcLength) const;
533
534 /**
535 * Compare the characters bitwise in the range
536 * [`start`, `limit`) with the characters
537 * in `srcText` in the range
538 * [`srcStart`, `srcLimit`).
539 * @param start the offset at which the compare operation begins
540 * @param limit the offset immediately following the compare operation
541 * @param srcText the text to be compared
542 * @param srcStart the offset into `srcText` to start comparison
543 * @param srcLimit the offset into `srcText` to limit comparison
544 * @return The result of bitwise character comparison: 0 if this
545 * contains the same characters as `srcText`, -1 if the characters in
546 * this are bitwise less than the characters in `srcText`, +1 if the
547 * characters in this are bitwise greater than the characters
548 * in `srcText`.
549 * @stable ICU 2.0
550 */
551 inline int8_t compareBetween(int32_t start,
552 int32_t limit,
553 const UnicodeString& srcText,
554 int32_t srcStart,
555 int32_t srcLimit) const;
556
557 /**
558 * Compare two Unicode strings in code point order.
559 * The result may be different from the results of compare(), operator<, etc.
560 * if supplementary characters are present:
561 *
562 * In UTF-16, supplementary characters (with code points U+10000 and above) are
563 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
564 * which means that they compare as less than some other BMP characters like U+feff.
565 * This function compares Unicode strings in code point order.
566 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
567 *
568 * @param text Another string to compare this one to.
569 * @return a negative/zero/positive integer corresponding to whether
570 * this string is less than/equal to/greater than the second one
571 * in code point order
572 * @stable ICU 2.0
573 */
574 inline int8_t compareCodePointOrder(const UnicodeString& text) const;
575
576 /**
577 * Compare two Unicode strings in code point order.
578 * The result may be different from the results of compare(), operator<, etc.
579 * if supplementary characters are present:
580 *
581 * In UTF-16, supplementary characters (with code points U+10000 and above) are
582 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
583 * which means that they compare as less than some other BMP characters like U+feff.
584 * This function compares Unicode strings in code point order.
585 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
586 *
587 * @param start The start offset in this string at which the compare operation begins.
588 * @param length The number of code units from this string to compare.
589 * @param srcText Another string to compare this one to.
590 * @return a negative/zero/positive integer corresponding to whether
591 * this string is less than/equal to/greater than the second one
592 * in code point order
593 * @stable ICU 2.0
594 */
595 inline int8_t compareCodePointOrder(int32_t start,
596 int32_t length,
597 const UnicodeString& srcText) const;
598
599 /**
600 * Compare two Unicode strings in code point order.
601 * The result may be different from the results of compare(), operator<, etc.
602 * if supplementary characters are present:
603 *
604 * In UTF-16, supplementary characters (with code points U+10000 and above) are
605 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
606 * which means that they compare as less than some other BMP characters like U+feff.
607 * This function compares Unicode strings in code point order.
608 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
609 *
610 * @param start The start offset in this string at which the compare operation begins.
611 * @param length The number of code units from this string to compare.
612 * @param srcText Another string to compare this one to.
613 * @param srcStart The start offset in that string at which the compare operation begins.
614 * @param srcLength The number of code units from that string to compare.
615 * @return a negative/zero/positive integer corresponding to whether
616 * this string is less than/equal to/greater than the second one
617 * in code point order
618 * @stable ICU 2.0
619 */
620 inline int8_t compareCodePointOrder(int32_t start,
621 int32_t length,
622 const UnicodeString& srcText,
623 int32_t srcStart,
624 int32_t srcLength) const;
625
626 /**
627 * Compare two Unicode strings in code point order.
628 * The result may be different from the results of compare(), operator<, etc.
629 * if supplementary characters are present:
630 *
631 * In UTF-16, supplementary characters (with code points U+10000 and above) are
632 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
633 * which means that they compare as less than some other BMP characters like U+feff.
634 * This function compares Unicode strings in code point order.
635 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
636 *
637 * @param srcChars A pointer to another string to compare this one to.
638 * @param srcLength The number of code units from that string to compare.
639 * @return a negative/zero/positive integer corresponding to whether
640 * this string is less than/equal to/greater than the second one
641 * in code point order
642 * @stable ICU 2.0
643 */
644 inline int8_t compareCodePointOrder(ConstChar16Ptr srcChars,
645 int32_t srcLength) const;
646
647 /**
648 * Compare two Unicode strings in code point order.
649 * The result may be different from the results of compare(), operator<, etc.
650 * if supplementary characters are present:
651 *
652 * In UTF-16, supplementary characters (with code points U+10000 and above) are
653 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
654 * which means that they compare as less than some other BMP characters like U+feff.
655 * This function compares Unicode strings in code point order.
656 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
657 *
658 * @param start The start offset in this string at which the compare operation begins.
659 * @param length The number of code units from this string to compare.
660 * @param srcChars A pointer to another string to compare this one to.
661 * @return a negative/zero/positive integer corresponding to whether
662 * this string is less than/equal to/greater than the second one
663 * in code point order
664 * @stable ICU 2.0
665 */
666 inline int8_t compareCodePointOrder(int32_t start,
667 int32_t length,
668 const char16_t *srcChars) const;
669
670 /**
671 * Compare two Unicode strings in code point order.
672 * The result may be different from the results of compare(), operator<, etc.
673 * if supplementary characters are present:
674 *
675 * In UTF-16, supplementary characters (with code points U+10000 and above) are
676 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
677 * which means that they compare as less than some other BMP characters like U+feff.
678 * This function compares Unicode strings in code point order.
679 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
680 *
681 * @param start The start offset in this string at which the compare operation begins.
682 * @param length The number of code units from this string to compare.
683 * @param srcChars A pointer to another string to compare this one to.
684 * @param srcStart The start offset in that string at which the compare operation begins.
685 * @param srcLength The number of code units from that string to compare.
686 * @return a negative/zero/positive integer corresponding to whether
687 * this string is less than/equal to/greater than the second one
688 * in code point order
689 * @stable ICU 2.0
690 */
691 inline int8_t compareCodePointOrder(int32_t start,
692 int32_t length,
693 const char16_t *srcChars,
694 int32_t srcStart,
695 int32_t srcLength) const;
696
697 /**
698 * Compare two Unicode strings in code point order.
699 * The result may be different from the results of compare(), operator<, etc.
700 * if supplementary characters are present:
701 *
702 * In UTF-16, supplementary characters (with code points U+10000 and above) are
703 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
704 * which means that they compare as less than some other BMP characters like U+feff.
705 * This function compares Unicode strings in code point order.
706 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
707 *
708 * @param start The start offset in this string at which the compare operation begins.
709 * @param limit The offset after the last code unit from this string to compare.
710 * @param srcText Another string to compare this one to.
711 * @param srcStart The start offset in that string at which the compare operation begins.
712 * @param srcLimit The offset after the last code unit from that string to compare.
713 * @return a negative/zero/positive integer corresponding to whether
714 * this string is less than/equal to/greater than the second one
715 * in code point order
716 * @stable ICU 2.0
717 */
718 inline int8_t compareCodePointOrderBetween(int32_t start,
719 int32_t limit,
720 const UnicodeString& srcText,
721 int32_t srcStart,
722 int32_t srcLimit) const;
723
724 /**
725 * Compare two strings case-insensitively using full case folding.
726 * This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
727 *
728 * @param text Another string to compare this one to.
729 * @param options A bit set of options:
730 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
731 * Comparison in code unit order with default case folding.
732 *
733 * - U_COMPARE_CODE_POINT_ORDER
734 * Set to choose code point order instead of code unit order
735 * (see u_strCompare for details).
736 *
737 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
738 *
739 * @return A negative, zero, or positive integer indicating the comparison result.
740 * @stable ICU 2.0
741 */
742 inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
743
744 /**
745 * Compare two strings case-insensitively using full case folding.
746 * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
747 *
748 * @param start The start offset in this string at which the compare operation begins.
749 * @param length The number of code units from this string to compare.
750 * @param srcText Another string to compare this one to.
751 * @param options A bit set of options:
752 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
753 * Comparison in code unit order with default case folding.
754 *
755 * - U_COMPARE_CODE_POINT_ORDER
756 * Set to choose code point order instead of code unit order
757 * (see u_strCompare for details).
758 *
759 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
760 *
761 * @return A negative, zero, or positive integer indicating the comparison result.
762 * @stable ICU 2.0
763 */
764 inline int8_t caseCompare(int32_t start,
765 int32_t length,
766 const UnicodeString& srcText,
767 uint32_t options) const;
768
769 /**
770 * Compare two strings case-insensitively using full case folding.
771 * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
772 *
773 * @param start The start offset in this string at which the compare operation begins.
774 * @param length The number of code units from this string to compare.
775 * @param srcText Another string to compare this one to.
776 * @param srcStart The start offset in that string at which the compare operation begins.
777 * @param srcLength The number of code units from that string to compare.
778 * @param options A bit set of options:
779 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
780 * Comparison in code unit order with default case folding.
781 *
782 * - U_COMPARE_CODE_POINT_ORDER
783 * Set to choose code point order instead of code unit order
784 * (see u_strCompare for details).
785 *
786 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
787 *
788 * @return A negative, zero, or positive integer indicating the comparison result.
789 * @stable ICU 2.0
790 */
791 inline int8_t caseCompare(int32_t start,
792 int32_t length,
793 const UnicodeString& srcText,
794 int32_t srcStart,
795 int32_t srcLength,
796 uint32_t options) const;
797
798 /**
799 * Compare two strings case-insensitively using full case folding.
800 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
801 *
802 * @param srcChars A pointer to another string to compare this one to.
803 * @param srcLength The number of code units from that string to compare.
804 * @param options A bit set of options:
805 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
806 * Comparison in code unit order with default case folding.
807 *
808 * - U_COMPARE_CODE_POINT_ORDER
809 * Set to choose code point order instead of code unit order
810 * (see u_strCompare for details).
811 *
812 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
813 *
814 * @return A negative, zero, or positive integer indicating the comparison result.
815 * @stable ICU 2.0
816 */
817 inline int8_t caseCompare(ConstChar16Ptr srcChars,
818 int32_t srcLength,
819 uint32_t options) const;
820
821 /**
822 * Compare two strings case-insensitively using full case folding.
823 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
824 *
825 * @param start The start offset in this string at which the compare operation begins.
826 * @param length The number of code units from this string to compare.
827 * @param srcChars A pointer to another string to compare this one to.
828 * @param options A bit set of options:
829 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
830 * Comparison in code unit order with default case folding.
831 *
832 * - U_COMPARE_CODE_POINT_ORDER
833 * Set to choose code point order instead of code unit order
834 * (see u_strCompare for details).
835 *
836 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
837 *
838 * @return A negative, zero, or positive integer indicating the comparison result.
839 * @stable ICU 2.0
840 */
841 inline int8_t caseCompare(int32_t start,
842 int32_t length,
843 const char16_t *srcChars,
844 uint32_t options) const;
845
846 /**
847 * Compare two strings case-insensitively using full case folding.
848 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
849 *
850 * @param start The start offset in this string at which the compare operation begins.
851 * @param length The number of code units from this string to compare.
852 * @param srcChars A pointer to another string to compare this one to.
853 * @param srcStart The start offset in that string at which the compare operation begins.
854 * @param srcLength The number of code units from that string to compare.
855 * @param options A bit set of options:
856 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
857 * Comparison in code unit order with default case folding.
858 *
859 * - U_COMPARE_CODE_POINT_ORDER
860 * Set to choose code point order instead of code unit order
861 * (see u_strCompare for details).
862 *
863 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
864 *
865 * @return A negative, zero, or positive integer indicating the comparison result.
866 * @stable ICU 2.0
867 */
868 inline int8_t caseCompare(int32_t start,
869 int32_t length,
870 const char16_t *srcChars,
871 int32_t srcStart,
872 int32_t srcLength,
873 uint32_t options) const;
874
875 /**
876 * Compare two strings case-insensitively using full case folding.
877 * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
878 *
879 * @param start The start offset in this string at which the compare operation begins.
880 * @param limit The offset after the last code unit from this string to compare.
881 * @param srcText Another string to compare this one to.
882 * @param srcStart The start offset in that string at which the compare operation begins.
883 * @param srcLimit The offset after the last code unit from that string to compare.
884 * @param options A bit set of options:
885 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
886 * Comparison in code unit order with default case folding.
887 *
888 * - U_COMPARE_CODE_POINT_ORDER
889 * Set to choose code point order instead of code unit order
890 * (see u_strCompare for details).
891 *
892 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
893 *
894 * @return A negative, zero, or positive integer indicating the comparison result.
895 * @stable ICU 2.0
896 */
897 inline int8_t caseCompareBetween(int32_t start,
898 int32_t limit,
899 const UnicodeString& srcText,
900 int32_t srcStart,
901 int32_t srcLimit,
902 uint32_t options) const;
903
904 /**
905 * Determine if this starts with the characters in `text`
906 * @param text The text to match.
907 * @return true if this starts with the characters in `text`,
908 * false otherwise
909 * @stable ICU 2.0
910 */
911 inline UBool startsWith(const UnicodeString& text) const;
912
913 /**
914 * Determine if this starts with the characters in `srcText`
915 * in the range [`srcStart`, `srcStart + srcLength`).
916 * @param srcText The text to match.
917 * @param srcStart the offset into `srcText` to start matching
918 * @param srcLength the number of characters in `srcText` to match
919 * @return true if this starts with the characters in `text`,
920 * false otherwise
921 * @stable ICU 2.0
922 */
923 inline UBool startsWith(const UnicodeString& srcText,
924 int32_t srcStart,
925 int32_t srcLength) const;
926
927 /**
928 * Determine if this starts with the characters in `srcChars`
929 * @param srcChars The characters to match.
930 * @param srcLength the number of characters in `srcChars`
931 * @return true if this starts with the characters in `srcChars`,
932 * false otherwise
933 * @stable ICU 2.0
934 */
935 inline UBool startsWith(ConstChar16Ptr srcChars,
936 int32_t srcLength) const;
937
938 /**
939 * Determine if this ends with the characters in `srcChars`
940 * in the range [`srcStart`, `srcStart + srcLength`).
941 * @param srcChars The characters to match.
942 * @param srcStart the offset into `srcText` to start matching
943 * @param srcLength the number of characters in `srcChars` to match
944 * @return true if this ends with the characters in `srcChars`, false otherwise
945 * @stable ICU 2.0
946 */
947 inline UBool startsWith(const char16_t *srcChars,
948 int32_t srcStart,
949 int32_t srcLength) const;
950
951 /**
952 * Determine if this ends with the characters in `text`
953 * @param text The text to match.
954 * @return true if this ends with the characters in `text`,
955 * false otherwise
956 * @stable ICU 2.0
957 */
958 inline UBool endsWith(const UnicodeString& text) const;
959
960 /**
961 * Determine if this ends with the characters in `srcText`
962 * in the range [`srcStart`, `srcStart + srcLength`).
963 * @param srcText The text to match.
964 * @param srcStart the offset into `srcText` to start matching
965 * @param srcLength the number of characters in `srcText` to match
966 * @return true if this ends with the characters in `text`,
967 * false otherwise
968 * @stable ICU 2.0
969 */
970 inline UBool endsWith(const UnicodeString& srcText,
971 int32_t srcStart,
972 int32_t srcLength) const;
973
974 /**
975 * Determine if this ends with the characters in `srcChars`
976 * @param srcChars The characters to match.
977 * @param srcLength the number of characters in `srcChars`
978 * @return true if this ends with the characters in `srcChars`,
979 * false otherwise
980 * @stable ICU 2.0
981 */
982 inline UBool endsWith(ConstChar16Ptr srcChars,
983 int32_t srcLength) const;
984
985 /**
986 * Determine if this ends with the characters in `srcChars`
987 * in the range [`srcStart`, `srcStart + srcLength`).
988 * @param srcChars The characters to match.
989 * @param srcStart the offset into `srcText` to start matching
990 * @param srcLength the number of characters in `srcChars` to match
991 * @return true if this ends with the characters in `srcChars`,
992 * false otherwise
993 * @stable ICU 2.0
994 */
995 inline UBool endsWith(const char16_t *srcChars,
996 int32_t srcStart,
997 int32_t srcLength) const;
998
999
1000 /* Searching - bitwise only */
1001
1002 /**
1003 * Locate in this the first occurrence of the characters in `text`,
1004 * using bitwise comparison.
1005 * @param text The text to search for.
1006 * @return The offset into this of the start of `text`,
1007 * or -1 if not found.
1008 * @stable ICU 2.0
1009 */
1010 inline int32_t indexOf(const UnicodeString& text) const;
1011
1012 /**
1013 * Locate in this the first occurrence of the characters in `text`
1014 * starting at offset `start`, using bitwise comparison.
1015 * @param text The text to search for.
1016 * @param start The offset at which searching will start.
1017 * @return The offset into this of the start of `text`,
1018 * or -1 if not found.
1019 * @stable ICU 2.0
1020 */
1021 inline int32_t indexOf(const UnicodeString& text,
1022 int32_t start) const;
1023
1024 /**
1025 * Locate in this the first occurrence in the range
1026 * [`start`, `start + length`) of the characters
1027 * in `text`, using bitwise comparison.
1028 * @param text The text to search for.
1029 * @param start The offset at which searching will start.
1030 * @param length The number of characters to search
1031 * @return The offset into this of the start of `text`,
1032 * or -1 if not found.
1033 * @stable ICU 2.0
1034 */
1035 inline int32_t indexOf(const UnicodeString& text,
1036 int32_t start,
1037 int32_t length) const;
1038
1039 /**
1040 * Locate in this the first occurrence in the range
1041 * [`start`, `start + length`) of the characters
1042 * in `srcText` in the range
1043 * [`srcStart`, `srcStart + srcLength`),
1044 * using bitwise comparison.
1045 * @param srcText The text to search for.
1046 * @param srcStart the offset into `srcText` at which
1047 * to start matching
1048 * @param srcLength the number of characters in `srcText` to match
1049 * @param start the offset into this at which to start matching
1050 * @param length the number of characters in this to search
1051 * @return The offset into this of the start of `text`,
1052 * or -1 if not found.
1053 * @stable ICU 2.0
1054 */
1055 inline int32_t indexOf(const UnicodeString& srcText,
1056 int32_t srcStart,
1057 int32_t srcLength,
1058 int32_t start,
1059 int32_t length) const;
1060
1061 /**
1062 * Locate in this the first occurrence of the characters in
1063 * `srcChars`
1064 * starting at offset `start`, using bitwise comparison.
1065 * @param srcChars The text to search for.
1066 * @param srcLength the number of characters in `srcChars` to match
1067 * @param start the offset into this at which to start matching
1068 * @return The offset into this of the start of `text`,
1069 * or -1 if not found.
1070 * @stable ICU 2.0
1071 */
1072 inline int32_t indexOf(const char16_t *srcChars,
1073 int32_t srcLength,
1074 int32_t start) const;
1075
1076 /**
1077 * Locate in this the first occurrence in the range
1078 * [`start`, `start + length`) of the characters
1079 * in `srcChars`, using bitwise comparison.
1080 * @param srcChars The text to search for.
1081 * @param srcLength the number of characters in `srcChars`
1082 * @param start The offset at which searching will start.
1083 * @param length The number of characters to search
1084 * @return The offset into this of the start of `srcChars`,
1085 * or -1 if not found.
1086 * @stable ICU 2.0
1087 */
1088 inline int32_t indexOf(ConstChar16Ptr srcChars,
1089 int32_t srcLength,
1090 int32_t start,
1091 int32_t length) const;
1092
1093 /**
1094 * Locate in this the first occurrence in the range
1095 * [`start`, `start + length`) of the characters
1096 * in `srcChars` in the range
1097 * [`srcStart`, `srcStart + srcLength`),
1098 * using bitwise comparison.
1099 * @param srcChars The text to search for.
1100 * @param srcStart the offset into `srcChars` at which
1101 * to start matching
1102 * @param srcLength the number of characters in `srcChars` to match
1103 * @param start the offset into this at which to start matching
1104 * @param length the number of characters in this to search
1105 * @return The offset into this of the start of `text`,
1106 * or -1 if not found.
1107 * @stable ICU 2.0
1108 */
1109 int32_t indexOf(const char16_t *srcChars,
1110 int32_t srcStart,
1111 int32_t srcLength,
1112 int32_t start,
1113 int32_t length) const;
1114
1115 /**
1116 * Locate in this the first occurrence of the BMP code point `c`,
1117 * using bitwise comparison.
1118 * @param c The code unit to search for.
1119 * @return The offset into this of `c`, or -1 if not found.
1120 * @stable ICU 2.0
1121 */
1122 inline int32_t indexOf(char16_t c) const;
1123
1124 /**
1125 * Locate in this the first occurrence of the code point `c`,
1126 * using bitwise comparison.
1127 *
1128 * @param c The code point to search for.
1129 * @return The offset into this of `c`, or -1 if not found.
1130 * @stable ICU 2.0
1131 */
1132 inline int32_t indexOf(UChar32 c) const;
1133
1134 /**
1135 * Locate in this the first occurrence of the BMP code point `c`,
1136 * starting at offset `start`, using bitwise comparison.
1137 * @param c The code unit to search for.
1138 * @param start The offset at which searching will start.
1139 * @return The offset into this of `c`, or -1 if not found.
1140 * @stable ICU 2.0
1141 */
1142 inline int32_t indexOf(char16_t c,
1143 int32_t start) const;
1144
1145 /**
1146 * Locate in this the first occurrence of the code point `c`
1147 * starting at offset `start`, using bitwise comparison.
1148 *
1149 * @param c The code point to search for.
1150 * @param start The offset at which searching will start.
1151 * @return The offset into this of `c`, or -1 if not found.
1152 * @stable ICU 2.0
1153 */
1154 inline int32_t indexOf(UChar32 c,
1155 int32_t start) const;
1156
1157 /**
1158 * Locate in this the first occurrence of the BMP code point `c`
1159 * in the range [`start`, `start + length`),
1160 * using bitwise comparison.
1161 * @param c The code unit to search for.
1162 * @param start the offset into this at which to start matching
1163 * @param length the number of characters in this to search
1164 * @return The offset into this of `c`, or -1 if not found.
1165 * @stable ICU 2.0
1166 */
1167 inline int32_t indexOf(char16_t c,
1168 int32_t start,
1169 int32_t length) const;
1170
1171 /**
1172 * Locate in this the first occurrence of the code point `c`
1173 * in the range [`start`, `start + length`),
1174 * using bitwise comparison.
1175 *
1176 * @param c The code point to search for.
1177 * @param start the offset into this at which to start matching
1178 * @param length the number of characters in this to search
1179 * @return The offset into this of `c`, or -1 if not found.
1180 * @stable ICU 2.0
1181 */
1182 inline int32_t indexOf(UChar32 c,
1183 int32_t start,
1184 int32_t length) const;
1185
1186 /**
1187 * Locate in this the last occurrence of the characters in `text`,
1188 * using bitwise comparison.
1189 * @param text The text to search for.
1190 * @return The offset into this of the start of `text`,
1191 * or -1 if not found.
1192 * @stable ICU 2.0
1193 */
1194 inline int32_t lastIndexOf(const UnicodeString& text) const;
1195
1196 /**
1197 * Locate in this the last occurrence of the characters in `text`
1198 * starting at offset `start`, using bitwise comparison.
1199 * @param text The text to search for.
1200 * @param start The offset at which searching will start.
1201 * @return The offset into this of the start of `text`,
1202 * or -1 if not found.
1203 * @stable ICU 2.0
1204 */
1205 inline int32_t lastIndexOf(const UnicodeString& text,
1206 int32_t start) const;
1207
1208 /**
1209 * Locate in this the last occurrence in the range
1210 * [`start`, `start + length`) of the characters
1211 * in `text`, using bitwise comparison.
1212 * @param text The text to search for.
1213 * @param start The offset at which searching will start.
1214 * @param length The number of characters to search
1215 * @return The offset into this of the start of `text`,
1216 * or -1 if not found.
1217 * @stable ICU 2.0
1218 */
1219 inline int32_t lastIndexOf(const UnicodeString& text,
1220 int32_t start,
1221 int32_t length) const;
1222
1223 /**
1224 * Locate in this the last occurrence in the range
1225 * [`start`, `start + length`) of the characters
1226 * in `srcText` in the range
1227 * [`srcStart`, `srcStart + srcLength`),
1228 * using bitwise comparison.
1229 * @param srcText The text to search for.
1230 * @param srcStart the offset into `srcText` at which
1231 * to start matching
1232 * @param srcLength the number of characters in `srcText` to match
1233 * @param start the offset into this at which to start matching
1234 * @param length the number of characters in this to search
1235 * @return The offset into this of the start of `text`,
1236 * or -1 if not found.
1237 * @stable ICU 2.0
1238 */
1239 inline int32_t lastIndexOf(const UnicodeString& srcText,
1240 int32_t srcStart,
1241 int32_t srcLength,
1242 int32_t start,
1243 int32_t length) const;
1244
1245 /**
1246 * Locate in this the last occurrence of the characters in `srcChars`
1247 * starting at offset `start`, using bitwise comparison.
1248 * @param srcChars The text to search for.
1249 * @param srcLength the number of characters in `srcChars` to match
1250 * @param start the offset into this at which to start matching
1251 * @return The offset into this of the start of `text`,
1252 * or -1 if not found.
1253 * @stable ICU 2.0
1254 */
1255 inline int32_t lastIndexOf(const char16_t *srcChars,
1256 int32_t srcLength,
1257 int32_t start) const;
1258
1259 /**
1260 * Locate in this the last occurrence in the range
1261 * [`start`, `start + length`) of the characters
1262 * in `srcChars`, using bitwise comparison.
1263 * @param srcChars The text to search for.
1264 * @param srcLength the number of characters in `srcChars`
1265 * @param start The offset at which searching will start.
1266 * @param length The number of characters to search
1267 * @return The offset into this of the start of `srcChars`,
1268 * or -1 if not found.
1269 * @stable ICU 2.0
1270 */
1271 inline int32_t lastIndexOf(ConstChar16Ptr srcChars,
1272 int32_t srcLength,
1273 int32_t start,
1274 int32_t length) const;
1275
1276 /**
1277 * Locate in this the last occurrence in the range
1278 * [`start`, `start + length`) of the characters
1279 * in `srcChars` in the range
1280 * [`srcStart`, `srcStart + srcLength`),
1281 * using bitwise comparison.
1282 * @param srcChars The text to search for.
1283 * @param srcStart the offset into `srcChars` at which
1284 * to start matching
1285 * @param srcLength the number of characters in `srcChars` to match
1286 * @param start the offset into this at which to start matching
1287 * @param length the number of characters in this to search
1288 * @return The offset into this of the start of `text`,
1289 * or -1 if not found.
1290 * @stable ICU 2.0
1291 */
1292 int32_t lastIndexOf(const char16_t *srcChars,
1293 int32_t srcStart,
1294 int32_t srcLength,
1295 int32_t start,
1296 int32_t length) const;
1297
1298 /**
1299 * Locate in this the last occurrence of the BMP code point `c`,
1300 * using bitwise comparison.
1301 * @param c The code unit to search for.
1302 * @return The offset into this of `c`, or -1 if not found.
1303 * @stable ICU 2.0
1304 */
1305 inline int32_t lastIndexOf(char16_t c) const;
1306
1307 /**
1308 * Locate in this the last occurrence of the code point `c`,
1309 * using bitwise comparison.
1310 *
1311 * @param c The code point to search for.
1312 * @return The offset into this of `c`, or -1 if not found.
1313 * @stable ICU 2.0
1314 */
1315 inline int32_t lastIndexOf(UChar32 c) const;
1316
1317 /**
1318 * Locate in this the last occurrence of the BMP code point `c`
1319 * starting at offset `start`, using bitwise comparison.
1320 * @param c The code unit to search for.
1321 * @param start The offset at which searching will start.
1322 * @return The offset into this of `c`, or -1 if not found.
1323 * @stable ICU 2.0
1324 */
1325 inline int32_t lastIndexOf(char16_t c,
1326 int32_t start) const;
1327
1328 /**
1329 * Locate in this the last occurrence of the code point `c`
1330 * starting at offset `start`, using bitwise comparison.
1331 *
1332 * @param c The code point to search for.
1333 * @param start The offset at which searching will start.
1334 * @return The offset into this of `c`, or -1 if not found.
1335 * @stable ICU 2.0
1336 */
1337 inline int32_t lastIndexOf(UChar32 c,
1338 int32_t start) const;
1339
1340 /**
1341 * Locate in this the last occurrence of the BMP code point `c`
1342 * in the range [`start`, `start + length`),
1343 * using bitwise comparison.
1344 * @param c The code unit to search for.
1345 * @param start the offset into this at which to start matching
1346 * @param length the number of characters in this to search
1347 * @return The offset into this of `c`, or -1 if not found.
1348 * @stable ICU 2.0
1349 */
1350 inline int32_t lastIndexOf(char16_t c,
1351 int32_t start,
1352 int32_t length) const;
1353
1354 /**
1355 * Locate in this the last occurrence of the code point `c`
1356 * in the range [`start`, `start + length`),
1357 * using bitwise comparison.
1358 *
1359 * @param c The code point to search for.
1360 * @param start the offset into this at which to start matching
1361 * @param length the number of characters in this to search
1362 * @return The offset into this of `c`, or -1 if not found.
1363 * @stable ICU 2.0
1364 */
1365 inline int32_t lastIndexOf(UChar32 c,
1366 int32_t start,
1367 int32_t length) const;
1368
1369
1370 /* Character access */
1371
1372 /**
1373 * Return the code unit at offset `offset`.
1374 * If the offset is not valid (0..length()-1) then U+ffff is returned.
1375 * @param offset a valid offset into the text
1376 * @return the code unit at offset `offset`
1377 * or 0xffff if the offset is not valid for this string
1378 * @stable ICU 2.0
1379 */
1380 inline char16_t charAt(int32_t offset) const;
1381
1382 /**
1383 * Return the code unit at offset `offset`.
1384 * If the offset is not valid (0..length()-1) then U+ffff is returned.
1385 * @param offset a valid offset into the text
1386 * @return the code unit at offset `offset`
1387 * @stable ICU 2.0
1388 */
1389 inline char16_t operator[] (int32_t offset) const;
1390
1391 /**
1392 * Return the code point that contains the code unit
1393 * at offset `offset`.
1394 * If the offset is not valid (0..length()-1) then U+ffff is returned.
1395 * @param offset a valid offset into the text
1396 * that indicates the text offset of any of the code units
1397 * that will be assembled into a code point (21-bit value) and returned
1398 * @return the code point of text at `offset`
1399 * or 0xffff if the offset is not valid for this string
1400 * @stable ICU 2.0
1401 */
1402 UChar32 char32At(int32_t offset) const;
1403
1404 /**
1405 * Adjust a random-access offset so that
1406 * it points to the beginning of a Unicode character.
1407 * The offset that is passed in points to
1408 * any code unit of a code point,
1409 * while the returned offset will point to the first code unit
1410 * of the same code point.
1411 * In UTF-16, if the input offset points to a second surrogate
1412 * of a surrogate pair, then the returned offset will point
1413 * to the first surrogate.
1414 * @param offset a valid offset into one code point of the text
1415 * @return offset of the first code unit of the same code point
1416 * @see U16_SET_CP_START
1417 * @stable ICU 2.0
1418 */
1419 int32_t getChar32Start(int32_t offset) const;
1420
1421 /**
1422 * Adjust a random-access offset so that
1423 * it points behind a Unicode character.
1424 * The offset that is passed in points behind
1425 * any code unit of a code point,
1426 * while the returned offset will point behind the last code unit
1427 * of the same code point.
1428 * In UTF-16, if the input offset points behind the first surrogate
1429 * (i.e., to the second surrogate)
1430 * of a surrogate pair, then the returned offset will point
1431 * behind the second surrogate (i.e., to the first surrogate).
1432 * @param offset a valid offset after any code unit of a code point of the text
1433 * @return offset of the first code unit after the same code point
1434 * @see U16_SET_CP_LIMIT
1435 * @stable ICU 2.0
1436 */
1437 int32_t getChar32Limit(int32_t offset) const;
1438
1439 /**
1440 * Move the code unit index along the string by delta code points.
1441 * Interpret the input index as a code unit-based offset into the string,
1442 * move the index forward or backward by delta code points, and
1443 * return the resulting index.
1444 * The input index should point to the first code unit of a code point,
1445 * if there is more than one.
1446 *
1447 * Both input and output indexes are code unit-based as for all
1448 * string indexes/offsets in ICU (and other libraries, like MBCS char*).
1449 * If delta<0 then the index is moved backward (toward the start of the string).
1450 * If delta>0 then the index is moved forward (toward the end of the string).
1451 *
1452 * This behaves like CharacterIterator::move32(delta, kCurrent).
1453 *
1454 * Behavior for out-of-bounds indexes:
1455 * `moveIndex32` pins the input index to 0..length(), i.e.,
1456 * if the input index<0 then it is pinned to 0;
1457 * if it is index>length() then it is pinned to length().
1458 * Afterwards, the index is moved by `delta` code points
1459 * forward or backward,
1460 * but no further backward than to 0 and no further forward than to length().
1461 * The resulting index return value will be in between 0 and length(), inclusively.
1462 *
1463 * Examples:
1464 * \code
1465 * // s has code points 'a' U+10000 'b' U+10ffff U+2029
1466 * UnicodeString s(u"a\U00010000b\U0010ffff\u2029");
1467 *
1468 * // initial index: position of U+10000
1469 * int32_t index=1;
1470 *
1471 * // the following examples will all result in index==4, position of U+10ffff
1472 *
1473 * // skip 2 code points from some position in the string
1474 * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
1475 *
1476 * // go to the 3rd code point from the start of s (0-based)
1477 * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
1478 *
1479 * // go to the next-to-last code point of s
1480 * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
1481 * \endcode
1482 *
1483 * @param index input code unit index
1484 * @param delta (signed) code point count to move the index forward or backward
1485 * in the string
1486 * @return the resulting code unit index
1487 * @stable ICU 2.0
1488 */
1489 int32_t moveIndex32(int32_t index, int32_t delta) const;
1490
1491 /* Substring extraction */
1492
1493 /**
1494 * Copy the characters in the range
1495 * [`start`, `start + length`) into the array `dst`,
1496 * beginning at `dstStart`.
1497 * If the string aliases to `dst` itself as an external buffer,
1498 * then extract() will not copy the contents.
1499 *
1500 * @param start offset of first character which will be copied into the array
1501 * @param length the number of characters to extract
1502 * @param dst array in which to copy characters. The length of `dst`
1503 * must be at least (`dstStart + length`).
1504 * @param dstStart the offset in `dst` where the first character
1505 * will be extracted
1506 * @stable ICU 2.0
1507 */
1508 inline void extract(int32_t start,
1509 int32_t length,
1510 Char16Ptr dst,
1511 int32_t dstStart = 0) const;
1512
1513 /**
1514 * Copy the contents of the string into dest.
1515 * This is a convenience function that
1516 * checks if there is enough space in dest,
1517 * extracts the entire string if possible,
1518 * and NUL-terminates dest if possible.
1519 *
1520 * If the string fits into dest but cannot be NUL-terminated
1521 * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
1522 * If the string itself does not fit into dest
1523 * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
1524 *
1525 * If the string aliases to `dest` itself as an external buffer,
1526 * then extract() will not copy the contents.
1527 *
1528 * @param dest Destination string buffer.
1529 * @param destCapacity Number of char16_ts available at dest.
1530 * @param errorCode ICU error code.
1531 * @return length()
1532 * @stable ICU 2.0
1533 */
1534 int32_t
1535 extract(Char16Ptr dest, int32_t destCapacity,
1536 UErrorCode &errorCode) const;
1537
1538 /**
1539 * Copy the characters in the range
1540 * [`start`, `start + length`) into the UnicodeString
1541 * `target`.
1542 * @param start offset of first character which will be copied
1543 * @param length the number of characters to extract
1544 * @param target UnicodeString into which to copy characters.
1545 * @stable ICU 2.0
1546 */
1547 inline void extract(int32_t start,
1548 int32_t length,
1549 UnicodeString& target) const;
1550
1551 /**
1552 * Copy the characters in the range [`start`, `limit`)
1553 * into the array `dst`, beginning at `dstStart`.
1554 * @param start offset of first character which will be copied into the array
1555 * @param limit offset immediately following the last character to be copied
1556 * @param dst array in which to copy characters. The length of `dst`
1557 * must be at least (`dstStart + (limit - start)`).
1558 * @param dstStart the offset in `dst` where the first character
1559 * will be extracted
1560 * @stable ICU 2.0
1561 */
1562 inline void extractBetween(int32_t start,
1563 int32_t limit,
1564 char16_t *dst,
1565 int32_t dstStart = 0) const;
1566
1567 /**
1568 * Copy the characters in the range [`start`, `limit`)
1569 * into the UnicodeString `target`. Replaceable API.
1570 * @param start offset of first character which will be copied
1571 * @param limit offset immediately following the last character to be copied
1572 * @param target UnicodeString into which to copy characters.
1573 * @stable ICU 2.0
1574 */
1575 virtual void extractBetween(int32_t start,
1576 int32_t limit,
1577 UnicodeString& target) const override;
1578
1579 /**
1580 * Copy the characters in the range
1581 * [`start`, `start + startLength`) into an array of characters.
1582 * All characters must be invariant (see utypes.h).
1583 * Use US_INV as the last, signature-distinguishing parameter.
1584 *
1585 * This function does not write any more than `targetCapacity`
1586 * characters but returns the length of the entire output string
1587 * so that one can allocate a larger buffer and call the function again
1588 * if necessary.
1589 * The output string is NUL-terminated if possible.
1590 *
1591 * @param start offset of first character which will be copied
1592 * @param startLength the number of characters to extract
1593 * @param target the target buffer for extraction, can be nullptr
1594 * if targetLength is 0
1595 * @param targetCapacity the length of the target buffer
1596 * @param inv Signature-distinguishing parameter, use US_INV.
1597 * @return the output string length, not including the terminating NUL
1598 * @stable ICU 3.2
1599 */
1600 int32_t extract(int32_t start,
1601 int32_t startLength,
1602 char *target,
1603 int32_t targetCapacity,
1604 enum EInvariant inv) const;
1605
1606 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
1607
1608 /**
1609 * Copy the characters in the range
1610 * [`start`, `start + length`) into an array of characters
1611 * in the platform's default codepage.
1612 * This function does not write any more than `targetLength`
1613 * characters but returns the length of the entire output string
1614 * so that one can allocate a larger buffer and call the function again
1615 * if necessary.
1616 * The output string is NUL-terminated if possible.
1617 *
1618 * @param start offset of first character which will be copied
1619 * @param startLength the number of characters to extract
1620 * @param target the target buffer for extraction
1621 * @param targetLength the length of the target buffer
1622 * If `target` is nullptr, then the number of bytes required for
1623 * `target` is returned.
1624 * @return the output string length, not including the terminating NUL
1625 * @stable ICU 2.0
1626 */
1627 int32_t extract(int32_t start,
1628 int32_t startLength,
1629 char *target,
1630 uint32_t targetLength) const;
1631
1632 #endif
1633
1634 #if !UCONFIG_NO_CONVERSION
1635
1636 /**
1637 * Copy the characters in the range
1638 * [`start`, `start + length`) into an array of characters
1639 * in a specified codepage.
1640 * The output string is NUL-terminated.
1641 *
1642 * Recommendation: For invariant-character strings use
1643 * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1644 * because it avoids object code dependencies of UnicodeString on
1645 * the conversion code.
1646 *
1647 * @param start offset of first character which will be copied
1648 * @param startLength the number of characters to extract
1649 * @param target the target buffer for extraction
1650 * @param codepage the desired codepage for the characters. 0 has
1651 * the special meaning of the default codepage
1652 * If `codepage` is an empty string (`""`),
1653 * then a simple conversion is performed on the codepage-invariant
1654 * subset ("invariant characters") of the platform encoding. See utypes.h.
1655 * If `target` is nullptr, then the number of bytes required for
1656 * `target` is returned. It is assumed that the target is big enough
1657 * to fit all of the characters.
1658 * @return the output string length, not including the terminating NUL
1659 * @stable ICU 2.0
1660 */
1661 inline int32_t extract(int32_t start,
1662 int32_t startLength,
1663 char* target,
1664 const char* codepage = nullptr) const;
1665
1666 /**
1667 * Copy the characters in the range
1668 * [`start`, `start + length`) into an array of characters
1669 * in a specified codepage.
1670 * This function does not write any more than `targetLength`
1671 * characters but returns the length of the entire output string
1672 * so that one can allocate a larger buffer and call the function again
1673 * if necessary.
1674 * The output string is NUL-terminated if possible.
1675 *
1676 * Recommendation: For invariant-character strings use
1677 * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1678 * because it avoids object code dependencies of UnicodeString on
1679 * the conversion code.
1680 *
1681 * @param start offset of first character which will be copied
1682 * @param startLength the number of characters to extract
1683 * @param target the target buffer for extraction
1684 * @param targetLength the length of the target buffer
1685 * @param codepage the desired codepage for the characters. 0 has
1686 * the special meaning of the default codepage
1687 * If `codepage` is an empty string (`""`),
1688 * then a simple conversion is performed on the codepage-invariant
1689 * subset ("invariant characters") of the platform encoding. See utypes.h.
1690 * If `target` is nullptr, then the number of bytes required for
1691 * `target` is returned.
1692 * @return the output string length, not including the terminating NUL
1693 * @stable ICU 2.0
1694 */
1695 int32_t extract(int32_t start,
1696 int32_t startLength,
1697 char *target,
1698 uint32_t targetLength,
1699 const char *codepage) const;
1700
1701 /**
1702 * Convert the UnicodeString into a codepage string using an existing UConverter.
1703 * The output string is NUL-terminated if possible.
1704 *
1705 * This function avoids the overhead of opening and closing a converter if
1706 * multiple strings are extracted.
1707 *
1708 * @param dest destination string buffer, can be nullptr if destCapacity==0
1709 * @param destCapacity the number of chars available at dest
1710 * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
1711 * or nullptr for the default converter
1712 * @param errorCode normal ICU error code
1713 * @return the length of the output string, not counting the terminating NUL;
1714 * if the length is greater than destCapacity, then the string will not fit
1715 * and a buffer of the indicated length would need to be passed in
1716 * @stable ICU 2.0
1717 */
1718 int32_t extract(char *dest, int32_t destCapacity,
1719 UConverter *cnv,
1720 UErrorCode &errorCode) const;
1721
1722 #endif
1723
1724 /**
1725 * Create a temporary substring for the specified range.
1726 * Unlike the substring constructor and setTo() functions,
1727 * the object returned here will be a read-only alias (using getBuffer())
1728 * rather than copying the text.
1729 * As a result, this substring operation is much faster but requires
1730 * that the original string not be modified or deleted during the lifetime
1731 * of the returned substring object.
1732 * @param start offset of the first character visible in the substring
1733 * @param length length of the substring
1734 * @return a read-only alias UnicodeString object for the substring
1735 * @stable ICU 4.4
1736 */
1737 UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
1738
1739 /**
1740 * Create a temporary substring for the specified range.
1741 * Same as tempSubString(start, length) except that the substring range
1742 * is specified as a (start, limit) pair (with an exclusive limit index)
1743 * rather than a (start, length) pair.
1744 * @param start offset of the first character visible in the substring
1745 * @param limit offset immediately following the last character visible in the substring
1746 * @return a read-only alias UnicodeString object for the substring
1747 * @stable ICU 4.4
1748 */
1749 inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
1750
1751 /**
1752 * Convert the UnicodeString to UTF-8 and write the result
1753 * to a ByteSink. This is called by toUTF8String().
1754 * Unpaired surrogates are replaced with U+FFFD.
1755 * Calls u_strToUTF8WithSub().
1756 *
1757 * @param sink A ByteSink to which the UTF-8 version of the string is written.
1758 * sink.Flush() is called at the end.
1759 * @stable ICU 4.2
1760 * @see toUTF8String
1761 */
1762 void toUTF8(ByteSink &sink) const;
1763
1764 /**
1765 * Convert the UnicodeString to UTF-8 and append the result
1766 * to a standard string.
1767 * Unpaired surrogates are replaced with U+FFFD.
1768 * Calls toUTF8().
1769 *
1770 * @param result A standard string (or a compatible object)
1771 * to which the UTF-8 version of the string is appended.
1772 * @return The string object.
1773 * @stable ICU 4.2
1774 * @see toUTF8
1775 */
1776 template<typename StringClass>
toUTF8String(StringClass & result)1777 StringClass &toUTF8String(StringClass &result) const {
1778 StringByteSink<StringClass> sbs(&result, length());
1779 toUTF8(sbs);
1780 return result;
1781 }
1782
1783 /**
1784 * Convert the UnicodeString to UTF-32.
1785 * Unpaired surrogates are replaced with U+FFFD.
1786 * Calls u_strToUTF32WithSub().
1787 *
1788 * @param utf32 destination string buffer, can be nullptr if capacity==0
1789 * @param capacity the number of UChar32s available at utf32
1790 * @param errorCode Standard ICU error code. Its input value must
1791 * pass the U_SUCCESS() test, or else the function returns
1792 * immediately. Check for U_FAILURE() on output or use with
1793 * function chaining. (See User Guide for details.)
1794 * @return The length of the UTF-32 string.
1795 * @see fromUTF32
1796 * @stable ICU 4.2
1797 */
1798 int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
1799
1800 /* Length operations */
1801
1802 /**
1803 * Return the length of the UnicodeString object.
1804 * The length is the number of char16_t code units are in the UnicodeString.
1805 * If you want the number of code points, please use countChar32().
1806 * @return the length of the UnicodeString object
1807 * @see countChar32
1808 * @stable ICU 2.0
1809 */
1810 inline int32_t length() const;
1811
1812 /**
1813 * Count Unicode code points in the length char16_t code units of the string.
1814 * A code point may occupy either one or two char16_t code units.
1815 * Counting code points involves reading all code units.
1816 *
1817 * This functions is basically the inverse of moveIndex32().
1818 *
1819 * @param start the index of the first code unit to check
1820 * @param length the number of char16_t code units to check
1821 * @return the number of code points in the specified code units
1822 * @see length
1823 * @stable ICU 2.0
1824 */
1825 int32_t
1826 countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
1827
1828 /**
1829 * Check if the length char16_t code units of the string
1830 * contain more Unicode code points than a certain number.
1831 * This is more efficient than counting all code points in this part of the string
1832 * and comparing that number with a threshold.
1833 * This function may not need to scan the string at all if the length
1834 * falls within a certain range, and
1835 * never needs to count more than 'number+1' code points.
1836 * Logically equivalent to (countChar32(start, length)>number).
1837 * A Unicode code point may occupy either one or two char16_t code units.
1838 *
1839 * @param start the index of the first code unit to check (0 for the entire string)
1840 * @param length the number of char16_t code units to check
1841 * (use INT32_MAX for the entire string; remember that start/length
1842 * values are pinned)
1843 * @param number The number of code points in the (sub)string is compared against
1844 * the 'number' parameter.
1845 * @return Boolean value for whether the string contains more Unicode code points
1846 * than 'number'. Same as (u_countChar32(s, length)>number).
1847 * @see countChar32
1848 * @see u_strHasMoreChar32Than
1849 * @stable ICU 2.4
1850 */
1851 UBool
1852 hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
1853
1854 /**
1855 * Determine if this string is empty.
1856 * @return true if this string contains 0 characters, false otherwise.
1857 * @stable ICU 2.0
1858 */
1859 inline UBool isEmpty() const;
1860
1861 /**
1862 * Return the capacity of the internal buffer of the UnicodeString object.
1863 * This is useful together with the getBuffer functions.
1864 * See there for details.
1865 *
1866 * @return the number of char16_ts available in the internal buffer
1867 * @see getBuffer
1868 * @stable ICU 2.0
1869 */
1870 inline int32_t getCapacity() const;
1871
1872 /* Other operations */
1873
1874 /**
1875 * Generate a hash code for this object.
1876 * @return The hash code of this UnicodeString.
1877 * @stable ICU 2.0
1878 */
1879 inline int32_t hashCode() const;
1880
1881 /**
1882 * Determine if this object contains a valid string.
1883 * A bogus string has no value. It is different from an empty string,
1884 * although in both cases isEmpty() returns true and length() returns 0.
1885 * setToBogus() and isBogus() can be used to indicate that no string value is available.
1886 * For a bogus string, getBuffer() and getTerminatedBuffer() return nullptr, and
1887 * length() returns 0.
1888 *
1889 * @return true if the string is bogus/invalid, false otherwise
1890 * @see setToBogus()
1891 * @stable ICU 2.0
1892 */
1893 inline UBool isBogus() const;
1894
1895 //========================================
1896 // Write operations
1897 //========================================
1898
1899 /* Assignment operations */
1900
1901 /**
1902 * Assignment operator. Replace the characters in this UnicodeString
1903 * with the characters from `srcText`.
1904 *
1905 * Starting with ICU 2.4, the assignment operator and the copy constructor
1906 * allocate a new buffer and copy the buffer contents even for readonly aliases.
1907 * By contrast, the fastCopyFrom() function implements the old,
1908 * more efficient but less safe behavior
1909 * of making this string also a readonly alias to the same buffer.
1910 *
1911 * If the source object has an "open" buffer from getBuffer(minCapacity),
1912 * then the copy is an empty string.
1913 *
1914 * @param srcText The text containing the characters to replace
1915 * @return a reference to this
1916 * @stable ICU 2.0
1917 * @see fastCopyFrom
1918 */
1919 UnicodeString &operator=(const UnicodeString &srcText);
1920
1921 /**
1922 * Almost the same as the assignment operator.
1923 * Replace the characters in this UnicodeString
1924 * with the characters from `srcText`.
1925 *
1926 * This function works the same as the assignment operator
1927 * for all strings except for ones that are readonly aliases.
1928 *
1929 * Starting with ICU 2.4, the assignment operator and the copy constructor
1930 * allocate a new buffer and copy the buffer contents even for readonly aliases.
1931 * This function implements the old, more efficient but less safe behavior
1932 * of making this string also a readonly alias to the same buffer.
1933 *
1934 * The fastCopyFrom function must be used only if it is known that the lifetime of
1935 * this UnicodeString does not exceed the lifetime of the aliased buffer
1936 * including its contents, for example for strings from resource bundles
1937 * or aliases to string constants.
1938 *
1939 * If the source object has an "open" buffer from getBuffer(minCapacity),
1940 * then the copy is an empty string.
1941 *
1942 * @param src The text containing the characters to replace.
1943 * @return a reference to this
1944 * @stable ICU 2.4
1945 */
1946 UnicodeString &fastCopyFrom(const UnicodeString &src);
1947
1948 #ifndef U_HIDE_DRAFT_API
1949 /**
1950 * Assignment operator. Replaces the characters in this UnicodeString
1951 * with a copy of the characters from the `src`
1952 * which is, or which is implicitly convertible to,
1953 * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.
1954 *
1955 * @param src The string view containing the characters to copy.
1956 * @return a reference to this
1957 * @draft ICU 76
1958 */
1959 template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
1960 inline UnicodeString &operator=(const S &src) {
1961 unBogus();
1962 return doReplace(0, length(), internal::toU16StringView(src));
1963 }
1964 #endif // U_HIDE_DRAFT_API
1965
1966 /**
1967 * Move assignment operator; might leave src in bogus state.
1968 * This string will have the same contents and state that the source string had.
1969 * The behavior is undefined if *this and src are the same object.
1970 * @param src source string
1971 * @return *this
1972 * @stable ICU 56
1973 */
1974 UnicodeString &operator=(UnicodeString &&src) noexcept;
1975
1976 /**
1977 * Swap strings.
1978 * @param other other string
1979 * @stable ICU 56
1980 */
1981 void swap(UnicodeString &other) noexcept;
1982
1983 /**
1984 * Non-member UnicodeString swap function.
1985 * @param s1 will get s2's contents and state
1986 * @param s2 will get s1's contents and state
1987 * @stable ICU 56
1988 */
1989 friend inline void U_EXPORT2
swap(UnicodeString & s1,UnicodeString & s2)1990 swap(UnicodeString &s1, UnicodeString &s2) noexcept {
1991 s1.swap(s2);
1992 }
1993
1994 /**
1995 * Assignment operator. Replace the characters in this UnicodeString
1996 * with the code unit `ch`.
1997 * @param ch the code unit to replace
1998 * @return a reference to this
1999 * @stable ICU 2.0
2000 */
2001 inline UnicodeString& operator= (char16_t ch);
2002
2003 /**
2004 * Assignment operator. Replace the characters in this UnicodeString
2005 * with the code point `ch`.
2006 * @param ch the code point to replace
2007 * @return a reference to this
2008 * @stable ICU 2.0
2009 */
2010 inline UnicodeString& operator= (UChar32 ch);
2011
2012 /**
2013 * Set the text in the UnicodeString object to the characters
2014 * in `srcText` in the range
2015 * [`srcStart`, `srcText.length()`).
2016 * `srcText` is not modified.
2017 * @param srcText the source for the new characters
2018 * @param srcStart the offset into `srcText` where new characters
2019 * will be obtained
2020 * @return a reference to this
2021 * @stable ICU 2.2
2022 */
2023 inline UnicodeString& setTo(const UnicodeString& srcText,
2024 int32_t srcStart);
2025
2026 /**
2027 * Set the text in the UnicodeString object to the characters
2028 * in `srcText` in the range
2029 * [`srcStart`, `srcStart + srcLength`).
2030 * `srcText` is not modified.
2031 * @param srcText the source for the new characters
2032 * @param srcStart the offset into `srcText` where new characters
2033 * will be obtained
2034 * @param srcLength the number of characters in `srcText` in the
2035 * replace string.
2036 * @return a reference to this
2037 * @stable ICU 2.0
2038 */
2039 inline UnicodeString& setTo(const UnicodeString& srcText,
2040 int32_t srcStart,
2041 int32_t srcLength);
2042
2043 /**
2044 * Set the text in the UnicodeString object to the characters in
2045 * `srcText`.
2046 * `srcText` is not modified.
2047 * @param srcText the source for the new characters
2048 * @return a reference to this
2049 * @stable ICU 2.0
2050 */
2051 inline UnicodeString& setTo(const UnicodeString& srcText);
2052
2053 /**
2054 * Set the characters in the UnicodeString object to the characters
2055 * in `srcChars`. `srcChars` is not modified.
2056 * @param srcChars the source for the new characters
2057 * @param srcLength the number of Unicode characters in srcChars.
2058 * @return a reference to this
2059 * @stable ICU 2.0
2060 */
2061 inline UnicodeString& setTo(const char16_t *srcChars,
2062 int32_t srcLength);
2063
2064 /**
2065 * Set the characters in the UnicodeString object to the code unit
2066 * `srcChar`.
2067 * @param srcChar the code unit which becomes the UnicodeString's character
2068 * content
2069 * @return a reference to this
2070 * @stable ICU 2.0
2071 */
2072 inline UnicodeString& setTo(char16_t srcChar);
2073
2074 /**
2075 * Set the characters in the UnicodeString object to the code point
2076 * `srcChar`.
2077 * @param srcChar the code point which becomes the UnicodeString's character
2078 * content
2079 * @return a reference to this
2080 * @stable ICU 2.0
2081 */
2082 inline UnicodeString& setTo(UChar32 srcChar);
2083
2084 /**
2085 * Aliasing setTo() function, analogous to the readonly-aliasing char16_t* constructor.
2086 * The text will be used for the UnicodeString object, but
2087 * it will not be released when the UnicodeString is destroyed.
2088 * This has copy-on-write semantics:
2089 * When the string is modified, then the buffer is first copied into
2090 * newly allocated memory.
2091 * The aliased buffer is never modified.
2092 *
2093 * In an assignment to another UnicodeString, when using the copy constructor
2094 * or the assignment operator, the text will be copied.
2095 * When using fastCopyFrom(), the text will be aliased again,
2096 * so that both strings then alias the same readonly-text.
2097 *
2098 * @param isTerminated specifies if `text` is `NUL`-terminated.
2099 * This must be true if `textLength==-1`.
2100 * @param text The characters to alias for the UnicodeString.
2101 * @param textLength The number of Unicode characters in `text` to alias.
2102 * If -1, then this constructor will determine the length
2103 * by calling `u_strlen()`.
2104 * @return a reference to this
2105 * @stable ICU 2.0
2106 */
2107 UnicodeString &setTo(UBool isTerminated,
2108 ConstChar16Ptr text,
2109 int32_t textLength);
2110
2111 /**
2112 * Aliasing setTo() function, analogous to the writable-aliasing char16_t* constructor.
2113 * The text will be used for the UnicodeString object, but
2114 * it will not be released when the UnicodeString is destroyed.
2115 * This has write-through semantics:
2116 * For as long as the capacity of the buffer is sufficient, write operations
2117 * will directly affect the buffer. When more capacity is necessary, then
2118 * a new buffer will be allocated and the contents copied as with regularly
2119 * constructed strings.
2120 * In an assignment to another UnicodeString, the buffer will be copied.
2121 * The extract(Char16Ptr dst) function detects whether the dst pointer is the same
2122 * as the string buffer itself and will in this case not copy the contents.
2123 *
2124 * @param buffer The characters to alias for the UnicodeString.
2125 * @param buffLength The number of Unicode characters in `buffer` to alias.
2126 * @param buffCapacity The size of `buffer` in char16_ts.
2127 * @return a reference to this
2128 * @stable ICU 2.0
2129 */
2130 UnicodeString &setTo(char16_t *buffer,
2131 int32_t buffLength,
2132 int32_t buffCapacity);
2133
2134 /**
2135 * Make this UnicodeString object invalid.
2136 * The string will test true with isBogus().
2137 *
2138 * A bogus string has no value. It is different from an empty string.
2139 * It can be used to indicate that no string value is available.
2140 * getBuffer() and getTerminatedBuffer() return nullptr, and
2141 * length() returns 0.
2142 *
2143 * This utility function is used throughout the UnicodeString
2144 * implementation to indicate that a UnicodeString operation failed,
2145 * and may be used in other functions,
2146 * especially but not exclusively when such functions do not
2147 * take a UErrorCode for simplicity.
2148 *
2149 * The following methods, and no others, will clear a string object's bogus flag:
2150 * - remove()
2151 * - remove(0, INT32_MAX)
2152 * - truncate(0)
2153 * - operator=() (assignment operator)
2154 * - setTo(...)
2155 *
2156 * The simplest ways to turn a bogus string into an empty one
2157 * is to use the remove() function.
2158 * Examples for other functions that are equivalent to "set to empty string":
2159 * \code
2160 * if(s.isBogus()) {
2161 * s.remove(); // set to an empty string (remove all), or
2162 * s.remove(0, INT32_MAX); // set to an empty string (remove all), or
2163 * s.truncate(0); // set to an empty string (complete truncation), or
2164 * s=UnicodeString(); // assign an empty string, or
2165 * s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
2166 * s.setTo(u"", 0); // set to an empty C Unicode string
2167 * }
2168 * \endcode
2169 *
2170 * @see isBogus()
2171 * @stable ICU 2.0
2172 */
2173 void setToBogus();
2174
2175 /**
2176 * Set the character at the specified offset to the specified character.
2177 * @param offset A valid offset into the text of the character to set
2178 * @param ch The new character
2179 * @return A reference to this
2180 * @stable ICU 2.0
2181 */
2182 UnicodeString& setCharAt(int32_t offset,
2183 char16_t ch);
2184
2185
2186 /* Append operations */
2187
2188 /**
2189 * Append operator. Append the code unit `ch` to the UnicodeString
2190 * object.
2191 * @param ch the code unit to be appended
2192 * @return a reference to this
2193 * @stable ICU 2.0
2194 */
2195 inline UnicodeString& operator+= (char16_t ch);
2196
2197 /**
2198 * Append operator. Append the code point `ch` to the UnicodeString
2199 * object.
2200 * @param ch the code point to be appended
2201 * @return a reference to this
2202 * @stable ICU 2.0
2203 */
2204 inline UnicodeString& operator+= (UChar32 ch);
2205
2206 /**
2207 * Append operator. Append the characters in `srcText` to the
2208 * UnicodeString object. `srcText` is not modified.
2209 * @param srcText the source for the new characters
2210 * @return a reference to this
2211 * @stable ICU 2.0
2212 */
2213 inline UnicodeString& operator+= (const UnicodeString& srcText);
2214
2215 #ifndef U_HIDE_DRAFT_API
2216 /**
2217 * Append operator. Appends the characters in `src`
2218 * which is, or which is implicitly convertible to,
2219 * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view,
2220 * to the UnicodeString object.
2221 *
2222 * @param src the source for the new characters
2223 * @return a reference to this
2224 * @draft ICU 76
2225 */
2226 template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
2227 inline UnicodeString& operator+=(const S &src) {
2228 return doAppend(internal::toU16StringView(src));
2229 }
2230 #endif // U_HIDE_DRAFT_API
2231
2232 /**
2233 * Append the characters
2234 * in `srcText` in the range
2235 * [`srcStart`, `srcStart + srcLength`) to the
2236 * UnicodeString object at offset `start`. `srcText`
2237 * is not modified.
2238 * @param srcText the source for the new characters
2239 * @param srcStart the offset into `srcText` where new characters
2240 * will be obtained
2241 * @param srcLength the number of characters in `srcText` in
2242 * the append string
2243 * @return a reference to this
2244 * @stable ICU 2.0
2245 */
2246 inline UnicodeString& append(const UnicodeString& srcText,
2247 int32_t srcStart,
2248 int32_t srcLength);
2249
2250 /**
2251 * Append the characters in `srcText` to the UnicodeString object.
2252 * `srcText` is not modified.
2253 * @param srcText the source for the new characters
2254 * @return a reference to this
2255 * @stable ICU 2.0
2256 */
2257 inline UnicodeString& append(const UnicodeString& srcText);
2258
2259 /**
2260 * Append the characters in `srcChars` in the range
2261 * [`srcStart`, `srcStart + srcLength`) to the UnicodeString
2262 * object at offset
2263 * `start`. `srcChars` is not modified.
2264 * @param srcChars the source for the new characters
2265 * @param srcStart the offset into `srcChars` where new characters
2266 * will be obtained
2267 * @param srcLength the number of characters in `srcChars` in
2268 * the append string; can be -1 if `srcChars` is NUL-terminated
2269 * @return a reference to this
2270 * @stable ICU 2.0
2271 */
2272 inline UnicodeString& append(const char16_t *srcChars,
2273 int32_t srcStart,
2274 int32_t srcLength);
2275
2276 /**
2277 * Append the characters in `srcChars` to the UnicodeString object.
2278 * `srcChars` is not modified.
2279 * @param srcChars the source for the new characters
2280 * @param srcLength the number of Unicode characters in `srcChars`;
2281 * can be -1 if `srcChars` is NUL-terminated
2282 * @return a reference to this
2283 * @stable ICU 2.0
2284 */
2285 inline UnicodeString& append(ConstChar16Ptr srcChars,
2286 int32_t srcLength);
2287
2288 #ifndef U_HIDE_DRAFT_API
2289 /**
2290 * Appends the characters in `src`
2291 * which is, or which is implicitly convertible to,
2292 * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view,
2293 * to the UnicodeString object.
2294 *
2295 * @param src the source for the new characters
2296 * @return a reference to this
2297 * @draft ICU 76
2298 */
2299 template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
append(const S & src)2300 inline UnicodeString& append(const S &src) {
2301 return doAppend(internal::toU16StringView(src));
2302 }
2303 #endif // U_HIDE_DRAFT_API
2304
2305 /**
2306 * Append the code unit `srcChar` to the UnicodeString object.
2307 * @param srcChar the code unit to append
2308 * @return a reference to this
2309 * @stable ICU 2.0
2310 */
2311 inline UnicodeString& append(char16_t srcChar);
2312
2313 /**
2314 * Append the code point `srcChar` to the UnicodeString object.
2315 * @param srcChar the code point to append
2316 * @return a reference to this
2317 * @stable ICU 2.0
2318 */
2319 UnicodeString& append(UChar32 srcChar);
2320
2321
2322 /* Insert operations */
2323
2324 /**
2325 * Insert the characters in `srcText` in the range
2326 * [`srcStart`, `srcStart + srcLength`) into the UnicodeString
2327 * object at offset `start`. `srcText` is not modified.
2328 * @param start the offset where the insertion begins
2329 * @param srcText the source for the new characters
2330 * @param srcStart the offset into `srcText` where new characters
2331 * will be obtained
2332 * @param srcLength the number of characters in `srcText` in
2333 * the insert string
2334 * @return a reference to this
2335 * @stable ICU 2.0
2336 */
2337 inline UnicodeString& insert(int32_t start,
2338 const UnicodeString& srcText,
2339 int32_t srcStart,
2340 int32_t srcLength);
2341
2342 /**
2343 * Insert the characters in `srcText` into the UnicodeString object
2344 * at offset `start`. `srcText` is not modified.
2345 * @param start the offset where the insertion begins
2346 * @param srcText the source for the new characters
2347 * @return a reference to this
2348 * @stable ICU 2.0
2349 */
2350 inline UnicodeString& insert(int32_t start,
2351 const UnicodeString& srcText);
2352
2353 /**
2354 * Insert the characters in `srcChars` in the range
2355 * [`srcStart`, `srcStart + srcLength`) into the UnicodeString
2356 * object at offset `start`. `srcChars` is not modified.
2357 * @param start the offset at which the insertion begins
2358 * @param srcChars the source for the new characters
2359 * @param srcStart the offset into `srcChars` where new characters
2360 * will be obtained
2361 * @param srcLength the number of characters in `srcChars`
2362 * in the insert string
2363 * @return a reference to this
2364 * @stable ICU 2.0
2365 */
2366 inline UnicodeString& insert(int32_t start,
2367 const char16_t *srcChars,
2368 int32_t srcStart,
2369 int32_t srcLength);
2370
2371 /**
2372 * Insert the characters in `srcChars` into the UnicodeString object
2373 * at offset `start`. `srcChars` is not modified.
2374 * @param start the offset where the insertion begins
2375 * @param srcChars the source for the new characters
2376 * @param srcLength the number of Unicode characters in srcChars.
2377 * @return a reference to this
2378 * @stable ICU 2.0
2379 */
2380 inline UnicodeString& insert(int32_t start,
2381 ConstChar16Ptr srcChars,
2382 int32_t srcLength);
2383
2384 /**
2385 * Insert the code unit `srcChar` into the UnicodeString object at
2386 * offset `start`.
2387 * @param start the offset at which the insertion occurs
2388 * @param srcChar the code unit to insert
2389 * @return a reference to this
2390 * @stable ICU 2.0
2391 */
2392 inline UnicodeString& insert(int32_t start,
2393 char16_t srcChar);
2394
2395 /**
2396 * Insert the code point `srcChar` into the UnicodeString object at
2397 * offset `start`.
2398 * @param start the offset at which the insertion occurs
2399 * @param srcChar the code point to insert
2400 * @return a reference to this
2401 * @stable ICU 2.0
2402 */
2403 inline UnicodeString& insert(int32_t start,
2404 UChar32 srcChar);
2405
2406
2407 /* Replace operations */
2408
2409 /**
2410 * Replace the characters in the range
2411 * [`start`, `start + length`) with the characters in
2412 * `srcText` in the range
2413 * [`srcStart`, `srcStart + srcLength`).
2414 * `srcText` is not modified.
2415 * @param start the offset at which the replace operation begins
2416 * @param length the number of characters to replace. The character at
2417 * `start + length` is not modified.
2418 * @param srcText the source for the new characters
2419 * @param srcStart the offset into `srcText` where new characters
2420 * will be obtained
2421 * @param srcLength the number of characters in `srcText` in
2422 * the replace string
2423 * @return a reference to this
2424 * @stable ICU 2.0
2425 */
2426 inline UnicodeString& replace(int32_t start,
2427 int32_t length,
2428 const UnicodeString& srcText,
2429 int32_t srcStart,
2430 int32_t srcLength);
2431
2432 /**
2433 * Replace the characters in the range
2434 * [`start`, `start + length`)
2435 * with the characters in `srcText`. `srcText` is
2436 * not modified.
2437 * @param start the offset at which the replace operation begins
2438 * @param length the number of characters to replace. The character at
2439 * `start + length` is not modified.
2440 * @param srcText the source for the new characters
2441 * @return a reference to this
2442 * @stable ICU 2.0
2443 */
2444 inline UnicodeString& replace(int32_t start,
2445 int32_t length,
2446 const UnicodeString& srcText);
2447
2448 /**
2449 * Replace the characters in the range
2450 * [`start`, `start + length`) with the characters in
2451 * `srcChars` in the range
2452 * [`srcStart`, `srcStart + srcLength`). `srcChars`
2453 * is not modified.
2454 * @param start the offset at which the replace operation begins
2455 * @param length the number of characters to replace. The character at
2456 * `start + length` is not modified.
2457 * @param srcChars the source for the new characters
2458 * @param srcStart the offset into `srcChars` where new characters
2459 * will be obtained
2460 * @param srcLength the number of characters in `srcChars`
2461 * in the replace string
2462 * @return a reference to this
2463 * @stable ICU 2.0
2464 */
2465 inline UnicodeString& replace(int32_t start,
2466 int32_t length,
2467 const char16_t *srcChars,
2468 int32_t srcStart,
2469 int32_t srcLength);
2470
2471 /**
2472 * Replace the characters in the range
2473 * [`start`, `start + length`) with the characters in
2474 * `srcChars`. `srcChars` is not modified.
2475 * @param start the offset at which the replace operation begins
2476 * @param length number of characters to replace. The character at
2477 * `start + length` is not modified.
2478 * @param srcChars the source for the new characters
2479 * @param srcLength the number of Unicode characters in srcChars
2480 * @return a reference to this
2481 * @stable ICU 2.0
2482 */
2483 inline UnicodeString& replace(int32_t start,
2484 int32_t length,
2485 ConstChar16Ptr srcChars,
2486 int32_t srcLength);
2487
2488 /**
2489 * Replace the characters in the range
2490 * [`start`, `start + length`) with the code unit
2491 * `srcChar`.
2492 * @param start the offset at which the replace operation begins
2493 * @param length the number of characters to replace. The character at
2494 * `start + length` is not modified.
2495 * @param srcChar the new code unit
2496 * @return a reference to this
2497 * @stable ICU 2.0
2498 */
2499 inline UnicodeString& replace(int32_t start,
2500 int32_t length,
2501 char16_t srcChar);
2502
2503 /**
2504 * Replace the characters in the range
2505 * [`start`, `start + length`) with the code point
2506 * `srcChar`.
2507 * @param start the offset at which the replace operation begins
2508 * @param length the number of characters to replace. The character at
2509 * `start + length` is not modified.
2510 * @param srcChar the new code point
2511 * @return a reference to this
2512 * @stable ICU 2.0
2513 */
2514 UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
2515
2516 /**
2517 * Replace the characters in the range [`start`, `limit`)
2518 * with the characters in `srcText`. `srcText` is not modified.
2519 * @param start the offset at which the replace operation begins
2520 * @param limit the offset immediately following the replace range
2521 * @param srcText the source for the new characters
2522 * @return a reference to this
2523 * @stable ICU 2.0
2524 */
2525 inline UnicodeString& replaceBetween(int32_t start,
2526 int32_t limit,
2527 const UnicodeString& srcText);
2528
2529 /**
2530 * Replace the characters in the range [`start`, `limit`)
2531 * with the characters in `srcText` in the range
2532 * [`srcStart`, `srcLimit`). `srcText` is not modified.
2533 * @param start the offset at which the replace operation begins
2534 * @param limit the offset immediately following the replace range
2535 * @param srcText the source for the new characters
2536 * @param srcStart the offset into `srcChars` where new characters
2537 * will be obtained
2538 * @param srcLimit the offset immediately following the range to copy
2539 * in `srcText`
2540 * @return a reference to this
2541 * @stable ICU 2.0
2542 */
2543 inline UnicodeString& replaceBetween(int32_t start,
2544 int32_t limit,
2545 const UnicodeString& srcText,
2546 int32_t srcStart,
2547 int32_t srcLimit);
2548
2549 /**
2550 * Replace a substring of this object with the given text.
2551 * @param start the beginning index, inclusive; `0 <= start <= limit`.
2552 * @param limit the ending index, exclusive; `start <= limit <= length()`.
2553 * @param text the text to replace characters `start` to `limit - 1`
2554 * @stable ICU 2.0
2555 */
2556 virtual void handleReplaceBetween(int32_t start,
2557 int32_t limit,
2558 const UnicodeString& text) override;
2559
2560 /**
2561 * Replaceable API
2562 * @return true if it has MetaData
2563 * @stable ICU 2.4
2564 */
2565 virtual UBool hasMetaData() const override;
2566
2567 /**
2568 * Copy a substring of this object, retaining attribute (out-of-band)
2569 * information. This method is used to duplicate or reorder substrings.
2570 * The destination index must not overlap the source range.
2571 *
2572 * @param start the beginning index, inclusive; `0 <= start <= limit`.
2573 * @param limit the ending index, exclusive; `start <= limit <= length()`.
2574 * @param dest the destination index. The characters from
2575 * `start..limit-1` will be copied to `dest`.
2576 * Implementations of this method may assume that `dest <= start ||
2577 * dest >= limit`.
2578 * @stable ICU 2.0
2579 */
2580 virtual void copy(int32_t start, int32_t limit, int32_t dest) override;
2581
2582 /* Search and replace operations */
2583
2584 /**
2585 * Replace all occurrences of characters in oldText with the characters
2586 * in newText
2587 * @param oldText the text containing the search text
2588 * @param newText the text containing the replacement text
2589 * @return a reference to this
2590 * @stable ICU 2.0
2591 */
2592 inline UnicodeString& findAndReplace(const UnicodeString& oldText,
2593 const UnicodeString& newText);
2594
2595 /**
2596 * Replace all occurrences of characters in oldText with characters
2597 * in newText
2598 * in the range [`start`, `start + length`).
2599 * @param start the start of the range in which replace will performed
2600 * @param length the length of the range in which replace will be performed
2601 * @param oldText the text containing the search text
2602 * @param newText the text containing the replacement text
2603 * @return a reference to this
2604 * @stable ICU 2.0
2605 */
2606 inline UnicodeString& findAndReplace(int32_t start,
2607 int32_t length,
2608 const UnicodeString& oldText,
2609 const UnicodeString& newText);
2610
2611 /**
2612 * Replace all occurrences of characters in oldText in the range
2613 * [`oldStart`, `oldStart + oldLength`) with the characters
2614 * in newText in the range
2615 * [`newStart`, `newStart + newLength`)
2616 * in the range [`start`, `start + length`).
2617 * @param start the start of the range in which replace will performed
2618 * @param length the length of the range in which replace will be performed
2619 * @param oldText the text containing the search text
2620 * @param oldStart the start of the search range in `oldText`
2621 * @param oldLength the length of the search range in `oldText`
2622 * @param newText the text containing the replacement text
2623 * @param newStart the start of the replacement range in `newText`
2624 * @param newLength the length of the replacement range in `newText`
2625 * @return a reference to this
2626 * @stable ICU 2.0
2627 */
2628 UnicodeString& findAndReplace(int32_t start,
2629 int32_t length,
2630 const UnicodeString& oldText,
2631 int32_t oldStart,
2632 int32_t oldLength,
2633 const UnicodeString& newText,
2634 int32_t newStart,
2635 int32_t newLength);
2636
2637
2638 /* Remove operations */
2639
2640 /**
2641 * Removes all characters from the UnicodeString object and clears the bogus flag.
2642 * This is the UnicodeString equivalent of std::string’s clear().
2643 *
2644 * @return a reference to this
2645 * @see setToBogus
2646 * @stable ICU 2.0
2647 */
2648 inline UnicodeString& remove();
2649
2650 /**
2651 * Remove the characters in the range
2652 * [`start`, `start + length`) from the UnicodeString object.
2653 * @param start the offset of the first character to remove
2654 * @param length the number of characters to remove
2655 * @return a reference to this
2656 * @stable ICU 2.0
2657 */
2658 inline UnicodeString& remove(int32_t start,
2659 int32_t length = static_cast<int32_t>(INT32_MAX));
2660
2661 /**
2662 * Remove the characters in the range
2663 * [`start`, `limit`) from the UnicodeString object.
2664 * @param start the offset of the first character to remove
2665 * @param limit the offset immediately following the range to remove
2666 * @return a reference to this
2667 * @stable ICU 2.0
2668 */
2669 inline UnicodeString& removeBetween(int32_t start,
2670 int32_t limit = static_cast<int32_t>(INT32_MAX));
2671
2672 /**
2673 * Retain only the characters in the range
2674 * [`start`, `limit`) from the UnicodeString object.
2675 * Removes characters before `start` and at and after `limit`.
2676 * @param start the offset of the first character to retain
2677 * @param limit the offset immediately following the range to retain
2678 * @return a reference to this
2679 * @stable ICU 4.4
2680 */
2681 inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
2682
2683 /* Length operations */
2684
2685 /**
2686 * Pad the start of this UnicodeString with the character `padChar`.
2687 * If the length of this UnicodeString is less than targetLength,
2688 * length() - targetLength copies of padChar will be added to the
2689 * beginning of this UnicodeString.
2690 * @param targetLength the desired length of the string
2691 * @param padChar the character to use for padding. Defaults to
2692 * space (U+0020)
2693 * @return true if the text was padded, false otherwise.
2694 * @stable ICU 2.0
2695 */
2696 UBool padLeading(int32_t targetLength,
2697 char16_t padChar = 0x0020);
2698
2699 /**
2700 * Pad the end of this UnicodeString with the character `padChar`.
2701 * If the length of this UnicodeString is less than targetLength,
2702 * length() - targetLength copies of padChar will be added to the
2703 * end of this UnicodeString.
2704 * @param targetLength the desired length of the string
2705 * @param padChar the character to use for padding. Defaults to
2706 * space (U+0020)
2707 * @return true if the text was padded, false otherwise.
2708 * @stable ICU 2.0
2709 */
2710 UBool padTrailing(int32_t targetLength,
2711 char16_t padChar = 0x0020);
2712
2713 /**
2714 * Truncate this UnicodeString to the `targetLength`.
2715 * @param targetLength the desired length of this UnicodeString.
2716 * @return true if the text was truncated, false otherwise
2717 * @stable ICU 2.0
2718 */
2719 inline UBool truncate(int32_t targetLength);
2720
2721 /**
2722 * Trims leading and trailing whitespace from this UnicodeString.
2723 * @return a reference to this
2724 * @stable ICU 2.0
2725 */
2726 UnicodeString& trim();
2727
2728 /* Miscellaneous operations */
2729
2730 /**
2731 * Reverse this UnicodeString in place.
2732 * @return a reference to this
2733 * @stable ICU 2.0
2734 */
2735 inline UnicodeString& reverse();
2736
2737 /**
2738 * Reverse the range [`start`, `start + length`) in
2739 * this UnicodeString.
2740 * @param start the start of the range to reverse
2741 * @param length the number of characters to to reverse
2742 * @return a reference to this
2743 * @stable ICU 2.0
2744 */
2745 inline UnicodeString& reverse(int32_t start,
2746 int32_t length);
2747
2748 /**
2749 * Convert the characters in this to UPPER CASE following the conventions of
2750 * the default locale.
2751 * @return A reference to this.
2752 * @stable ICU 2.0
2753 */
2754 UnicodeString& toUpper();
2755
2756 /**
2757 * Convert the characters in this to UPPER CASE following the conventions of
2758 * a specific locale.
2759 * @param locale The locale containing the conventions to use.
2760 * @return A reference to this.
2761 * @stable ICU 2.0
2762 */
2763 UnicodeString& toUpper(const Locale& locale);
2764
2765 /**
2766 * Convert the characters in this to lower case following the conventions of
2767 * the default locale.
2768 * @return A reference to this.
2769 * @stable ICU 2.0
2770 */
2771 UnicodeString& toLower();
2772
2773 /**
2774 * Convert the characters in this to lower case following the conventions of
2775 * a specific locale.
2776 * @param locale The locale containing the conventions to use.
2777 * @return A reference to this.
2778 * @stable ICU 2.0
2779 */
2780 UnicodeString& toLower(const Locale& locale);
2781
2782 #if !UCONFIG_NO_BREAK_ITERATION
2783
2784 /**
2785 * Titlecase this string, convenience function using the default locale.
2786 *
2787 * Casing is locale-dependent and context-sensitive.
2788 * Titlecasing uses a break iterator to find the first characters of words
2789 * that are to be titlecased. It titlecases those characters and lowercases
2790 * all others.
2791 *
2792 * The titlecase break iterator can be provided to customize for arbitrary
2793 * styles, using rules and dictionaries beyond the standard iterators.
2794 * It may be more efficient to always provide an iterator to avoid
2795 * opening and closing one for each string.
2796 * If the break iterator passed in is null, the default Unicode algorithm
2797 * will be used to determine the titlecase positions.
2798 *
2799 * This function uses only the setText(), first() and next() methods of the
2800 * provided break iterator.
2801 *
2802 * @param titleIter A break iterator to find the first characters of words
2803 * that are to be titlecased.
2804 * If none is provided (0), then a standard titlecase
2805 * break iterator is opened.
2806 * Otherwise the provided iterator is set to the string's text.
2807 * @return A reference to this.
2808 * @stable ICU 2.1
2809 */
2810 UnicodeString &toTitle(BreakIterator *titleIter);
2811
2812 /**
2813 * Titlecase this string.
2814 *
2815 * Casing is locale-dependent and context-sensitive.
2816 * Titlecasing uses a break iterator to find the first characters of words
2817 * that are to be titlecased. It titlecases those characters and lowercases
2818 * all others.
2819 *
2820 * The titlecase break iterator can be provided to customize for arbitrary
2821 * styles, using rules and dictionaries beyond the standard iterators.
2822 * It may be more efficient to always provide an iterator to avoid
2823 * opening and closing one for each string.
2824 * If the break iterator passed in is null, the default Unicode algorithm
2825 * will be used to determine the titlecase positions.
2826 *
2827 * This function uses only the setText(), first() and next() methods of the
2828 * provided break iterator.
2829 *
2830 * @param titleIter A break iterator to find the first characters of words
2831 * that are to be titlecased.
2832 * If none is provided (0), then a standard titlecase
2833 * break iterator is opened.
2834 * Otherwise the provided iterator is set to the string's text.
2835 * @param locale The locale to consider.
2836 * @return A reference to this.
2837 * @stable ICU 2.1
2838 */
2839 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
2840
2841 /**
2842 * Titlecase this string, with options.
2843 *
2844 * Casing is locale-dependent and context-sensitive.
2845 * Titlecasing uses a break iterator to find the first characters of words
2846 * that are to be titlecased. It titlecases those characters and lowercases
2847 * all others. (This can be modified with options.)
2848 *
2849 * The titlecase break iterator can be provided to customize for arbitrary
2850 * styles, using rules and dictionaries beyond the standard iterators.
2851 * It may be more efficient to always provide an iterator to avoid
2852 * opening and closing one for each string.
2853 * If the break iterator passed in is null, the default Unicode algorithm
2854 * will be used to determine the titlecase positions.
2855 *
2856 * This function uses only the setText(), first() and next() methods of the
2857 * provided break iterator.
2858 *
2859 * @param titleIter A break iterator to find the first characters of words
2860 * that are to be titlecased.
2861 * If none is provided (0), then a standard titlecase
2862 * break iterator is opened.
2863 * Otherwise the provided iterator is set to the string's text.
2864 * @param locale The locale to consider.
2865 * @param options Options bit set, usually 0. See U_TITLECASE_NO_LOWERCASE,
2866 * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
2867 * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
2868 * @return A reference to this.
2869 * @stable ICU 3.8
2870 */
2871 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
2872
2873 #endif
2874
2875 /**
2876 * Case-folds the characters in this string.
2877 *
2878 * Case-folding is locale-independent and not context-sensitive,
2879 * but there is an option for whether to include or exclude mappings for dotted I
2880 * and dotless i that are marked with 'T' in CaseFolding.txt.
2881 *
2882 * The result may be longer or shorter than the original.
2883 *
2884 * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
2885 * @return A reference to this.
2886 * @stable ICU 2.0
2887 */
2888 UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
2889
2890 //========================================
2891 // Access to the internal buffer
2892 //========================================
2893
2894 /**
2895 * Get a read/write pointer to the internal buffer.
2896 * The buffer is guaranteed to be large enough for at least minCapacity char16_ts,
2897 * writable, and is still owned by the UnicodeString object.
2898 * Calls to getBuffer(minCapacity) must not be nested, and
2899 * must be matched with calls to releaseBuffer(newLength).
2900 * If the string buffer was read-only or shared,
2901 * then it will be reallocated and copied.
2902 *
2903 * An attempted nested call will return 0, and will not further modify the
2904 * state of the UnicodeString object.
2905 * It also returns 0 if the string is bogus.
2906 *
2907 * The actual capacity of the string buffer may be larger than minCapacity.
2908 * getCapacity() returns the actual capacity.
2909 * For many operations, the full capacity should be used to avoid reallocations.
2910 *
2911 * While the buffer is "open" between getBuffer(minCapacity)
2912 * and releaseBuffer(newLength), the following applies:
2913 * - The string length is set to 0.
2914 * - Any read API call on the UnicodeString object will behave like on a 0-length string.
2915 * - Any write API call on the UnicodeString object is disallowed and will have no effect.
2916 * - You can read from and write to the returned buffer.
2917 * - The previous string contents will still be in the buffer;
2918 * if you want to use it, then you need to call length() before getBuffer(minCapacity).
2919 * If the length() was greater than minCapacity, then any contents after minCapacity
2920 * may be lost.
2921 * The buffer contents is not NUL-terminated by getBuffer().
2922 * If length() < getCapacity() then you can terminate it by writing a NUL
2923 * at index length().
2924 * - You must call releaseBuffer(newLength) before and in order to
2925 * return to normal UnicodeString operation.
2926 *
2927 * @param minCapacity the minimum number of char16_ts that are to be available
2928 * in the buffer, starting at the returned pointer;
2929 * default to the current string capacity if minCapacity==-1
2930 * @return a writable pointer to the internal string buffer,
2931 * or nullptr if an error occurs (nested calls, out of memory)
2932 *
2933 * @see releaseBuffer
2934 * @see getTerminatedBuffer()
2935 * @stable ICU 2.0
2936 */
2937 char16_t *getBuffer(int32_t minCapacity);
2938
2939 /**
2940 * Release a read/write buffer on a UnicodeString object with an
2941 * "open" getBuffer(minCapacity).
2942 * This function must be called in a matched pair with getBuffer(minCapacity).
2943 * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
2944 *
2945 * It will set the string length to newLength, at most to the current capacity.
2946 * If newLength==-1 then it will set the length according to the
2947 * first NUL in the buffer, or to the capacity if there is no NUL.
2948 *
2949 * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
2950 *
2951 * @param newLength the new length of the UnicodeString object;
2952 * defaults to the current capacity if newLength is greater than that;
2953 * if newLength==-1, it defaults to u_strlen(buffer) but not more than
2954 * the current capacity of the string
2955 *
2956 * @see getBuffer(int32_t minCapacity)
2957 * @stable ICU 2.0
2958 */
2959 void releaseBuffer(int32_t newLength=-1);
2960
2961 /**
2962 * Get a read-only pointer to the internal buffer.
2963 * This can be called at any time on a valid UnicodeString.
2964 *
2965 * It returns 0 if the string is bogus, or
2966 * during an "open" getBuffer(minCapacity).
2967 *
2968 * It can be called as many times as desired.
2969 * The pointer that it returns will remain valid until the UnicodeString object is modified,
2970 * at which time the pointer is semantically invalidated and must not be used any more.
2971 *
2972 * The capacity of the buffer can be determined with getCapacity().
2973 * The part after length() may or may not be initialized and valid,
2974 * depending on the history of the UnicodeString object.
2975 *
2976 * The buffer contents is (probably) not NUL-terminated.
2977 * You can check if it is with
2978 * `(s.length() < s.getCapacity() && buffer[s.length()]==0)`.
2979 * (See getTerminatedBuffer().)
2980 *
2981 * The buffer may reside in read-only memory. Its contents must not
2982 * be modified.
2983 *
2984 * @return a read-only pointer to the internal string buffer,
2985 * or nullptr if the string is empty or bogus
2986 *
2987 * @see getBuffer(int32_t minCapacity)
2988 * @see getTerminatedBuffer()
2989 * @stable ICU 2.0
2990 */
2991 inline const char16_t *getBuffer() const;
2992
2993 /**
2994 * Get a read-only pointer to the internal buffer,
2995 * making sure that it is NUL-terminated.
2996 * This can be called at any time on a valid UnicodeString.
2997 *
2998 * It returns 0 if the string is bogus, or
2999 * during an "open" getBuffer(minCapacity), or if the buffer cannot
3000 * be NUL-terminated (because memory allocation failed).
3001 *
3002 * It can be called as many times as desired.
3003 * The pointer that it returns will remain valid until the UnicodeString object is modified,
3004 * at which time the pointer is semantically invalidated and must not be used any more.
3005 *
3006 * The capacity of the buffer can be determined with getCapacity().
3007 * The part after length()+1 may or may not be initialized and valid,
3008 * depending on the history of the UnicodeString object.
3009 *
3010 * The buffer contents is guaranteed to be NUL-terminated.
3011 * getTerminatedBuffer() may reallocate the buffer if a terminating NUL
3012 * is written.
3013 * For this reason, this function is not const, unlike getBuffer().
3014 * Note that a UnicodeString may also contain NUL characters as part of its contents.
3015 *
3016 * The buffer may reside in read-only memory. Its contents must not
3017 * be modified.
3018 *
3019 * @return a read-only pointer to the internal string buffer,
3020 * or 0 if the string is empty or bogus
3021 *
3022 * @see getBuffer(int32_t minCapacity)
3023 * @see getBuffer()
3024 * @stable ICU 2.2
3025 */
3026 const char16_t *getTerminatedBuffer();
3027
3028 #ifndef U_HIDE_DRAFT_API
3029 /**
3030 * Converts to a std::u16string_view.
3031 *
3032 * @return a string view of the contents of this string
3033 * @draft ICU 76
3034 */
u16string_view()3035 inline operator std::u16string_view() const {
3036 return {getBuffer(), static_cast<std::u16string_view::size_type>(length())};
3037 }
3038
3039 #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
3040 /**
3041 * Converts to a std::wstring_view.
3042 *
3043 * Note: This should remain draft until C++ standard plans
3044 * about char16_t vs. wchar_t become clearer.
3045 *
3046 * @return a string view of the contents of this string
3047 * @draft ICU 76
3048 */
wstring_view()3049 inline operator std::wstring_view() const {
3050 const char16_t *p = getBuffer();
3051 #ifdef U_ALIASING_BARRIER
3052 U_ALIASING_BARRIER(p);
3053 #endif
3054 return { reinterpret_cast<const wchar_t *>(p), (std::wstring_view::size_type)length() };
3055 }
3056 #endif // U_SIZEOF_WCHAR_T
3057 #endif // U_HIDE_DRAFT_API
3058
3059 //========================================
3060 // Constructors
3061 //========================================
3062
3063 /** Construct an empty UnicodeString.
3064 * @stable ICU 2.0
3065 */
3066 inline UnicodeString();
3067
3068 /**
3069 * Construct a UnicodeString with capacity to hold `capacity` char16_ts
3070 * @param capacity the number of char16_ts this UnicodeString should hold
3071 * before a resize is necessary; if count is greater than 0 and count
3072 * code points c take up more space than capacity, then capacity is adjusted
3073 * accordingly.
3074 * @param c is used to initially fill the string
3075 * @param count specifies how many code points c are to be written in the
3076 * string
3077 * @stable ICU 2.0
3078 */
3079 UnicodeString(int32_t capacity, UChar32 c, int32_t count);
3080
3081 /**
3082 * Single char16_t (code unit) constructor.
3083 *
3084 * It is recommended to mark this constructor "explicit" by
3085 * `-DUNISTR_FROM_CHAR_EXPLICIT=explicit`
3086 * on the compiler command line or similar.
3087 * @param ch the character to place in the UnicodeString
3088 * @stable ICU 2.0
3089 */
3090 UNISTR_FROM_CHAR_EXPLICIT UnicodeString(char16_t ch);
3091
3092 /**
3093 * Single UChar32 (code point) constructor.
3094 *
3095 * It is recommended to mark this constructor "explicit" by
3096 * `-DUNISTR_FROM_CHAR_EXPLICIT=explicit`
3097 * on the compiler command line or similar.
3098 * @param ch the character to place in the UnicodeString
3099 * @stable ICU 2.0
3100 */
3101 UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch);
3102
3103 #ifdef U_HIDE_DRAFT_API
3104 /**
3105 * char16_t* constructor.
3106 *
3107 * It is recommended to mark this constructor "explicit" by
3108 * `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
3109 * on the compiler command line or similar.
3110 *
3111 * Note, for string literals:
3112 * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
3113 * length determination:
3114 * \code
3115 * UnicodeString str(u"literal");
3116 * if (str == u"other literal") { ... }
3117 * \endcode
3118 *
3119 * @param text The characters to place in the UnicodeString. `text`
3120 * must be NUL (U+0000) terminated.
3121 * @stable ICU 2.0
3122 */
UnicodeString(const char16_t * text)3123 UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text) :
3124 UnicodeString(text, -1) {}
3125 #endif // U_HIDE_DRAFT_API
3126
3127 #if !U_CHAR16_IS_TYPEDEF && \
3128 (defined(U_HIDE_DRAFT_API) || (defined(_LIBCPP_VERSION) && _LIBCPP_VERSION >= 180000))
3129 /**
3130 * uint16_t * constructor.
3131 * Delegates to UnicodeString(const char16_t *).
3132 *
3133 * It is recommended to mark this constructor "explicit" by
3134 * `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
3135 * on the compiler command line or similar.
3136 *
3137 * Note, for string literals:
3138 * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
3139 * length determination:
3140 * \code
3141 * UnicodeString str(u"literal");
3142 * if (str == u"other literal") { ... }
3143 * \endcode
3144 *
3145 * @param text NUL-terminated UTF-16 string
3146 * @stable ICU 59
3147 */
UnicodeString(const uint16_t * text)3148 UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) :
3149 UnicodeString(ConstChar16Ptr(text), -1) {}
3150 #endif
3151
3152 #if defined(U_HIDE_DRAFT_API) && (U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN))
3153 /**
3154 * wchar_t * constructor.
3155 * (Only defined if U_SIZEOF_WCHAR_T==2.)
3156 * Delegates to UnicodeString(const char16_t *).
3157 *
3158 * It is recommended to mark this constructor "explicit" by
3159 * `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
3160 * on the compiler command line or similar.
3161 *
3162 * Note, for string literals:
3163 * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
3164 * length determination:
3165 * \code
3166 * UnicodeString str(u"literal");
3167 * if (str == u"other literal") { ... }
3168 * \endcode
3169 *
3170 * @param text NUL-terminated UTF-16 string
3171 * @stable ICU 59
3172 */
UnicodeString(const wchar_t * text)3173 UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) :
3174 UnicodeString(ConstChar16Ptr(text), -1) {}
3175 #endif
3176
3177 /**
3178 * nullptr_t constructor.
3179 * Effectively the same as the default constructor, makes an empty string object.
3180 *
3181 * It is recommended to mark this constructor "explicit" by
3182 * `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
3183 * on the compiler command line or similar.
3184 * @param text nullptr
3185 * @stable ICU 59
3186 */
3187 UNISTR_FROM_STRING_EXPLICIT inline UnicodeString(const std::nullptr_t text);
3188
3189 /**
3190 * char16_t* constructor.
3191 *
3192 * Note, for string literals:
3193 * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
3194 * length determination:
3195 * \code
3196 * UnicodeString str(u"literal");
3197 * if (str == u"other literal") { ... }
3198 * \endcode
3199 *
3200 * @param text The characters to place in the UnicodeString.
3201 * @param textLength The number of Unicode characters in `text`
3202 * to copy.
3203 * @stable ICU 2.0
3204 */
3205 UnicodeString(const char16_t *text,
3206 int32_t textLength);
3207
3208 #if !U_CHAR16_IS_TYPEDEF
3209 /**
3210 * uint16_t * constructor.
3211 * Delegates to UnicodeString(const char16_t *, int32_t).
3212 *
3213 * Note, for string literals:
3214 * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
3215 * length determination:
3216 * \code
3217 * UnicodeString str(u"literal");
3218 * if (str == u"other literal") { ... }
3219 * \endcode
3220 *
3221 * @param text UTF-16 string
3222 * @param textLength string length
3223 * @stable ICU 59
3224 */
UnicodeString(const uint16_t * text,int32_t textLength)3225 UnicodeString(const uint16_t *text, int32_t textLength) :
3226 UnicodeString(ConstChar16Ptr(text), textLength) {}
3227 #endif
3228
3229 #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
3230 /**
3231 * wchar_t * constructor.
3232 * (Only defined if U_SIZEOF_WCHAR_T==2.)
3233 * Delegates to UnicodeString(const char16_t *, int32_t).
3234 *
3235 * Note, for string literals:
3236 * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
3237 * length determination:
3238 * \code
3239 * UnicodeString str(u"literal");
3240 * if (str == u"other literal") { ... }
3241 * \endcode
3242 *
3243 * @param text UTF-16 string
3244 * @param textLength string length
3245 * @stable ICU 59
3246 */
UnicodeString(const wchar_t * text,int32_t textLength)3247 UnicodeString(const wchar_t *text, int32_t textLength) :
3248 UnicodeString(ConstChar16Ptr(text), textLength) {}
3249 #endif
3250
3251 /**
3252 * nullptr_t constructor.
3253 * Effectively the same as the default constructor, makes an empty string object.
3254 * @param text nullptr
3255 * @param textLength ignored
3256 * @stable ICU 59
3257 */
3258 inline UnicodeString(const std::nullptr_t text, int32_t textLength);
3259
3260 #ifndef U_HIDE_DRAFT_API
3261 /**
3262 * Constructor from `text`
3263 * which is, or which is implicitly convertible to,
3264 * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.
3265 * The string is bogus if the string view is too long.
3266 *
3267 * If you need a UnicodeString but need not copy the string view contents,
3268 * then you can call the UnicodeString::readOnlyAlias() function instead of this constructor.
3269 *
3270 * @param text UTF-16 string
3271 * @draft ICU 76
3272 */
3273 template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
UnicodeString(const S & text)3274 UNISTR_FROM_STRING_EXPLICIT UnicodeString(const S &text) {
3275 fUnion.fFields.fLengthAndFlags = kShortString;
3276 doAppend(internal::toU16StringViewNullable(text));
3277 }
3278 #endif // U_HIDE_DRAFT_API
3279
3280 /**
3281 * Readonly-aliasing char16_t* constructor.
3282 * The text will be used for the UnicodeString object, but
3283 * it will not be released when the UnicodeString is destroyed.
3284 * This has copy-on-write semantics:
3285 * When the string is modified, then the buffer is first copied into
3286 * newly allocated memory.
3287 * The aliased buffer is never modified.
3288 *
3289 * In an assignment to another UnicodeString, when using the copy constructor
3290 * or the assignment operator, the text will be copied.
3291 * When using fastCopyFrom(), the text will be aliased again,
3292 * so that both strings then alias the same readonly-text.
3293 *
3294 * Note, for string literals:
3295 * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
3296 * length determination:
3297 * \code
3298 * UnicodeString alias = UnicodeString::readOnlyAlias(u"literal");
3299 * if (str == u"other literal") { ... }
3300 * \endcode
3301 *
3302 * @param isTerminated specifies if `text` is `NUL`-terminated.
3303 * This must be true if `textLength==-1`.
3304 * @param text The characters to alias for the UnicodeString.
3305 * @param textLength The number of Unicode characters in `text` to alias.
3306 * If -1, then this constructor will determine the length
3307 * by calling `u_strlen()`.
3308 * @stable ICU 2.0
3309 */
3310 UnicodeString(UBool isTerminated,
3311 ConstChar16Ptr text,
3312 int32_t textLength);
3313
3314 /**
3315 * Writable-aliasing char16_t* constructor.
3316 * The text will be used for the UnicodeString object, but
3317 * it will not be released when the UnicodeString is destroyed.
3318 * This has write-through semantics:
3319 * For as long as the capacity of the buffer is sufficient, write operations
3320 * will directly affect the buffer. When more capacity is necessary, then
3321 * a new buffer will be allocated and the contents copied as with regularly
3322 * constructed strings.
3323 * In an assignment to another UnicodeString, the buffer will be copied.
3324 * The extract(Char16Ptr dst) function detects whether the dst pointer is the same
3325 * as the string buffer itself and will in this case not copy the contents.
3326 *
3327 * @param buffer The characters to alias for the UnicodeString.
3328 * @param buffLength The number of Unicode characters in `buffer` to alias.
3329 * @param buffCapacity The size of `buffer` in char16_ts.
3330 * @stable ICU 2.0
3331 */
3332 UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity);
3333
3334 #if !U_CHAR16_IS_TYPEDEF
3335 /**
3336 * Writable-aliasing uint16_t * constructor.
3337 * Delegates to UnicodeString(const char16_t *, int32_t, int32_t).
3338 * @param buffer writable buffer of/for UTF-16 text
3339 * @param buffLength length of the current buffer contents
3340 * @param buffCapacity buffer capacity
3341 * @stable ICU 59
3342 */
UnicodeString(uint16_t * buffer,int32_t buffLength,int32_t buffCapacity)3343 UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity) :
3344 UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {}
3345 #endif
3346
3347 #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
3348 /**
3349 * Writable-aliasing wchar_t * constructor.
3350 * (Only defined if U_SIZEOF_WCHAR_T==2.)
3351 * Delegates to UnicodeString(const char16_t *, int32_t, int32_t).
3352 * @param buffer writable buffer of/for UTF-16 text
3353 * @param buffLength length of the current buffer contents
3354 * @param buffCapacity buffer capacity
3355 * @stable ICU 59
3356 */
UnicodeString(wchar_t * buffer,int32_t buffLength,int32_t buffCapacity)3357 UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity) :
3358 UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {}
3359 #endif
3360
3361 /**
3362 * Writable-aliasing nullptr_t constructor.
3363 * Effectively the same as the default constructor, makes an empty string object.
3364 * @param buffer nullptr
3365 * @param buffLength ignored
3366 * @param buffCapacity ignored
3367 * @stable ICU 59
3368 */
3369 inline UnicodeString(std::nullptr_t buffer, int32_t buffLength, int32_t buffCapacity);
3370
3371 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
3372
3373 /**
3374 * char* constructor.
3375 * Uses the default converter (and thus depends on the ICU conversion code)
3376 * unless U_CHARSET_IS_UTF8 is set to 1.
3377 *
3378 * For ASCII (really "invariant character") strings it is more efficient to use
3379 * the constructor that takes a US_INV (for its enum EInvariant).
3380 *
3381 * Note, for string literals:
3382 * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
3383 * length determination:
3384 * \code
3385 * UnicodeString str(u"literal");
3386 * if (str == u"other literal") { ... }
3387 * \endcode
3388 *
3389 * It is recommended to mark this constructor "explicit" by
3390 * `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
3391 * on the compiler command line or similar.
3392 * @param codepageData an array of bytes, null-terminated,
3393 * in the platform's default codepage.
3394 * @stable ICU 2.0
3395 */
3396 UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
3397
3398 /**
3399 * char* constructor.
3400 * Uses the default converter (and thus depends on the ICU conversion code)
3401 * unless U_CHARSET_IS_UTF8 is set to 1.
3402 * @param codepageData an array of bytes in the platform's default codepage.
3403 * @param dataLength The number of bytes in `codepageData`.
3404 * @stable ICU 2.0
3405 */
3406 UnicodeString(const char *codepageData, int32_t dataLength);
3407
3408 #endif
3409
3410 #if !UCONFIG_NO_CONVERSION
3411
3412 /**
3413 * char* constructor.
3414 * @param codepageData an array of bytes, null-terminated
3415 * @param codepage the encoding of `codepageData`. The special
3416 * value 0 for `codepage` indicates that the text is in the
3417 * platform's default codepage.
3418 *
3419 * If `codepage` is an empty string (`""`),
3420 * then a simple conversion is performed on the codepage-invariant
3421 * subset ("invariant characters") of the platform encoding. See utypes.h.
3422 * Recommendation: For invariant-character strings use the constructor
3423 * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
3424 * because it avoids object code dependencies of UnicodeString on
3425 * the conversion code.
3426 *
3427 * @stable ICU 2.0
3428 */
3429 UnicodeString(const char *codepageData, const char *codepage);
3430
3431 /**
3432 * char* constructor.
3433 * @param codepageData an array of bytes.
3434 * @param dataLength The number of bytes in `codepageData`.
3435 * @param codepage the encoding of `codepageData`. The special
3436 * value 0 for `codepage` indicates that the text is in the
3437 * platform's default codepage.
3438 * If `codepage` is an empty string (`""`),
3439 * then a simple conversion is performed on the codepage-invariant
3440 * subset ("invariant characters") of the platform encoding. See utypes.h.
3441 * Recommendation: For invariant-character strings use the constructor
3442 * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
3443 * because it avoids object code dependencies of UnicodeString on
3444 * the conversion code.
3445 *
3446 * @stable ICU 2.0
3447 */
3448 UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
3449
3450 /**
3451 * char * / UConverter constructor.
3452 * This constructor uses an existing UConverter object to
3453 * convert the codepage string to Unicode and construct a UnicodeString
3454 * from that.
3455 *
3456 * The converter is reset at first.
3457 * If the error code indicates a failure before this constructor is called,
3458 * or if an error occurs during conversion or construction,
3459 * then the string will be bogus.
3460 *
3461 * This function avoids the overhead of opening and closing a converter if
3462 * multiple strings are constructed.
3463 *
3464 * @param src input codepage string
3465 * @param srcLength length of the input string, can be -1 for NUL-terminated strings
3466 * @param cnv converter object (ucnv_resetToUnicode() will be called),
3467 * can be nullptr for the default converter
3468 * @param errorCode normal ICU error code
3469 * @stable ICU 2.0
3470 */
3471 UnicodeString(
3472 const char *src, int32_t srcLength,
3473 UConverter *cnv,
3474 UErrorCode &errorCode);
3475
3476 #endif
3477
3478 /**
3479 * Constructs a Unicode string from an invariant-character char * string.
3480 * About invariant characters see utypes.h.
3481 * This constructor has no runtime dependency on conversion code and is
3482 * therefore recommended over ones taking a charset name string
3483 * (where the empty string "" indicates invariant-character conversion).
3484 *
3485 * Use the macro US_INV as the third, signature-distinguishing parameter.
3486 *
3487 * For example:
3488 * \code
3489 * void fn(const char *s) {
3490 * UnicodeString ustr(s, -1, US_INV);
3491 * // use ustr ...
3492 * }
3493 * \endcode
3494 *
3495 * Note, for string literals:
3496 * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
3497 * length determination:
3498 * \code
3499 * UnicodeString str(u"literal");
3500 * if (str == u"other literal") { ... }
3501 * \endcode
3502 *
3503 * @param src String using only invariant characters.
3504 * @param textLength Length of src, or -1 if NUL-terminated.
3505 * @param inv Signature-distinguishing parameter, use US_INV.
3506 *
3507 * @see US_INV
3508 * @stable ICU 3.2
3509 */
3510 UnicodeString(const char *src, int32_t textLength, enum EInvariant inv);
3511
3512
3513 /**
3514 * Copy constructor.
3515 *
3516 * Starting with ICU 2.4, the assignment operator and the copy constructor
3517 * allocate a new buffer and copy the buffer contents even for readonly aliases.
3518 * By contrast, the fastCopyFrom() function implements the old,
3519 * more efficient but less safe behavior
3520 * of making this string also a readonly alias to the same buffer.
3521 *
3522 * If the source object has an "open" buffer from getBuffer(minCapacity),
3523 * then the copy is an empty string.
3524 *
3525 * @param that The UnicodeString object to copy.
3526 * @stable ICU 2.0
3527 * @see fastCopyFrom
3528 */
3529 UnicodeString(const UnicodeString& that);
3530
3531 /**
3532 * Move constructor; might leave src in bogus state.
3533 * This string will have the same contents and state that the source string had.
3534 * @param src source string
3535 * @stable ICU 56
3536 */
3537 UnicodeString(UnicodeString &&src) noexcept;
3538
3539 /**
3540 * 'Substring' constructor from tail of source string.
3541 * @param src The UnicodeString object to copy.
3542 * @param srcStart The offset into `src` at which to start copying.
3543 * @stable ICU 2.2
3544 */
3545 UnicodeString(const UnicodeString& src, int32_t srcStart);
3546
3547 /**
3548 * 'Substring' constructor from subrange of source string.
3549 * @param src The UnicodeString object to copy.
3550 * @param srcStart The offset into `src` at which to start copying.
3551 * @param srcLength The number of characters from `src` to copy.
3552 * @stable ICU 2.2
3553 */
3554 UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3555
3556 /**
3557 * Clone this object, an instance of a subclass of Replaceable.
3558 * Clones can be used concurrently in multiple threads.
3559 * If a subclass does not implement clone(), or if an error occurs,
3560 * then nullptr is returned.
3561 * The caller must delete the clone.
3562 *
3563 * @return a clone of this object
3564 *
3565 * @see Replaceable::clone
3566 * @see getDynamicClassID
3567 * @stable ICU 2.6
3568 */
3569 virtual UnicodeString *clone() const override;
3570
3571 /** Destructor.
3572 * @stable ICU 2.0
3573 */
3574 virtual ~UnicodeString();
3575
3576 #ifndef U_HIDE_DRAFT_API
3577 /**
3578 * Readonly-aliasing factory method.
3579 * Aliases the same buffer as the input `text`
3580 * which is, or which is implicitly convertible to,
3581 * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.
3582 * The string is bogus if the string view is too long.
3583 *
3584 * The text will be used for the UnicodeString object, but
3585 * it will not be released when the UnicodeString is destroyed.
3586 * This has copy-on-write semantics:
3587 * When the string is modified, then the buffer is first copied into
3588 * newly allocated memory.
3589 * The aliased buffer is never modified.
3590 *
3591 * In an assignment to another UnicodeString, when using the copy constructor
3592 * or the assignment operator, the text will be copied.
3593 * When using fastCopyFrom(), the text will be aliased again,
3594 * so that both strings then alias the same readonly-text.
3595 *
3596 * @param text The string view to alias for the UnicodeString.
3597 * @draft ICU 76
3598 */
3599 template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
readOnlyAlias(const S & text)3600 static inline UnicodeString readOnlyAlias(const S &text) {
3601 return readOnlyAliasFromU16StringView(internal::toU16StringView(text));
3602 }
3603
3604 /**
3605 * Readonly-aliasing factory method.
3606 * Aliases the same buffer as the input `text`.
3607 *
3608 * The text will be used for the UnicodeString object, but
3609 * it will not be released when the UnicodeString is destroyed.
3610 * This has copy-on-write semantics:
3611 * When the string is modified, then the buffer is first copied into
3612 * newly allocated memory.
3613 * The aliased buffer is never modified.
3614 *
3615 * In an assignment to another UnicodeString, when using the copy constructor
3616 * or the assignment operator, the text will be copied.
3617 * When using fastCopyFrom(), the text will be aliased again,
3618 * so that both strings then alias the same readonly-text.
3619 *
3620 * @param text The UnicodeString to alias.
3621 * @draft ICU 76
3622 */
readOnlyAlias(const UnicodeString & text)3623 static inline UnicodeString readOnlyAlias(const UnicodeString &text) {
3624 return readOnlyAliasFromUnicodeString(text);
3625 }
3626 #endif // U_HIDE_DRAFT_API
3627
3628 /**
3629 * Create a UnicodeString from a UTF-8 string.
3630 * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
3631 * Calls u_strFromUTF8WithSub().
3632 *
3633 * @param utf8 UTF-8 input string.
3634 * Note that a StringPiece can be implicitly constructed
3635 * from a std::string or a NUL-terminated const char * string.
3636 * @return A UnicodeString with equivalent UTF-16 contents.
3637 * @see toUTF8
3638 * @see toUTF8String
3639 * @stable ICU 4.2
3640 */
3641 static UnicodeString fromUTF8(StringPiece utf8);
3642
3643 /**
3644 * Create a UnicodeString from a UTF-32 string.
3645 * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
3646 * Calls u_strFromUTF32WithSub().
3647 *
3648 * @param utf32 UTF-32 input string. Must not be nullptr.
3649 * @param length Length of the input string, or -1 if NUL-terminated.
3650 * @return A UnicodeString with equivalent UTF-16 contents.
3651 * @see toUTF32
3652 * @stable ICU 4.2
3653 */
3654 static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
3655
3656 /* Miscellaneous operations */
3657
3658 /**
3659 * Unescape a string of characters and return a string containing
3660 * the result. The following escape sequences are recognized:
3661 *
3662 * \\uhhhh 4 hex digits; h in [0-9A-Fa-f]
3663 * \\Uhhhhhhhh 8 hex digits
3664 * \\xhh 1-2 hex digits
3665 * \\ooo 1-3 octal digits; o in [0-7]
3666 * \\cX control-X; X is masked with 0x1F
3667 *
3668 * as well as the standard ANSI C escapes:
3669 *
3670 * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
3671 * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
3672 * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
3673 *
3674 * Anything else following a backslash is generically escaped. For
3675 * example, "[a\\-z]" returns "[a-z]".
3676 *
3677 * If an escape sequence is ill-formed, this method returns an empty
3678 * string. An example of an ill-formed sequence is "\\u" followed by
3679 * fewer than 4 hex digits.
3680 *
3681 * This function is similar to u_unescape() but not identical to it.
3682 * The latter takes a source char*, so it does escape recognition
3683 * and also invariant conversion.
3684 *
3685 * @return a string with backslash escapes interpreted, or an
3686 * empty string on error.
3687 * @see UnicodeString#unescapeAt()
3688 * @see u_unescape()
3689 * @see u_unescapeAt()
3690 * @stable ICU 2.0
3691 */
3692 UnicodeString unescape() const;
3693
3694 /**
3695 * Unescape a single escape sequence and return the represented
3696 * character. See unescape() for a listing of the recognized escape
3697 * sequences. The character at offset-1 is assumed (without
3698 * checking) to be a backslash. If the escape sequence is
3699 * ill-formed, or the offset is out of range, U_SENTINEL=-1 is
3700 * returned.
3701 *
3702 * @param offset an input output parameter. On input, it is the
3703 * offset into this string where the escape sequence is located,
3704 * after the initial backslash. On output, it is advanced after the
3705 * last character parsed. On error, it is not advanced at all.
3706 * @return the character represented by the escape sequence at
3707 * offset, or U_SENTINEL=-1 on error.
3708 * @see UnicodeString#unescape()
3709 * @see u_unescape()
3710 * @see u_unescapeAt()
3711 * @stable ICU 2.0
3712 */
3713 UChar32 unescapeAt(int32_t &offset) const;
3714
3715 /**
3716 * ICU "poor man's RTTI", returns a UClassID for this class.
3717 *
3718 * @stable ICU 2.2
3719 */
3720 static UClassID U_EXPORT2 getStaticClassID();
3721
3722 /**
3723 * ICU "poor man's RTTI", returns a UClassID for the actual class.
3724 *
3725 * @stable ICU 2.2
3726 */
3727 virtual UClassID getDynamicClassID() const override;
3728
3729 //========================================
3730 // Implementation methods
3731 //========================================
3732
3733 protected:
3734 /**
3735 * Implement Replaceable::getLength() (see jitterbug 1027).
3736 * @stable ICU 2.4
3737 */
3738 virtual int32_t getLength() const override;
3739
3740 /**
3741 * The change in Replaceable to use virtual getCharAt() allows
3742 * UnicodeString::charAt() to be inline again (see jitterbug 709).
3743 * @stable ICU 2.4
3744 */
3745 virtual char16_t getCharAt(int32_t offset) const override;
3746
3747 /**
3748 * The change in Replaceable to use virtual getChar32At() allows
3749 * UnicodeString::char32At() to be inline again (see jitterbug 709).
3750 * @stable ICU 2.4
3751 */
3752 virtual UChar32 getChar32At(int32_t offset) const override;
3753
3754 private:
3755 static UnicodeString readOnlyAliasFromU16StringView(std::u16string_view text);
3756 static UnicodeString readOnlyAliasFromUnicodeString(const UnicodeString &text);
3757
3758 // For char* constructors. Could be made public.
3759 UnicodeString &setToUTF8(StringPiece utf8);
3760 // For extract(char*).
3761 // We could make a toUTF8(target, capacity, errorCode) public but not
3762 // this version: New API will be cleaner if we make callers create substrings
3763 // rather than having start+length on every method,
3764 // and it should take a UErrorCode&.
3765 int32_t
3766 toUTF8(int32_t start, int32_t len,
3767 char *target, int32_t capacity) const;
3768
3769 /**
3770 * Internal string contents comparison, called by operator==.
3771 * Requires: this & text not bogus and have same lengths.
3772 */
doEquals(const UnicodeString & text,int32_t len)3773 inline UBool doEquals(const UnicodeString &text, int32_t len) const {
3774 return doEquals(text.getArrayStart(), len);
3775 }
3776 UBool doEquals(const char16_t *text, int32_t len) const;
3777
3778 inline UBool
3779 doEqualsSubstring(int32_t start,
3780 int32_t length,
3781 const UnicodeString& srcText,
3782 int32_t srcStart,
3783 int32_t srcLength) const;
3784
3785 UBool doEqualsSubstring(int32_t start,
3786 int32_t length,
3787 const char16_t *srcChars,
3788 int32_t srcStart,
3789 int32_t srcLength) const;
3790
3791 inline int8_t
3792 doCompare(int32_t start,
3793 int32_t length,
3794 const UnicodeString& srcText,
3795 int32_t srcStart,
3796 int32_t srcLength) const;
3797
3798 int8_t doCompare(int32_t start,
3799 int32_t length,
3800 const char16_t *srcChars,
3801 int32_t srcStart,
3802 int32_t srcLength) const;
3803
3804 inline int8_t
3805 doCompareCodePointOrder(int32_t start,
3806 int32_t length,
3807 const UnicodeString& srcText,
3808 int32_t srcStart,
3809 int32_t srcLength) const;
3810
3811 int8_t doCompareCodePointOrder(int32_t start,
3812 int32_t length,
3813 const char16_t *srcChars,
3814 int32_t srcStart,
3815 int32_t srcLength) const;
3816
3817 inline int8_t
3818 doCaseCompare(int32_t start,
3819 int32_t length,
3820 const UnicodeString &srcText,
3821 int32_t srcStart,
3822 int32_t srcLength,
3823 uint32_t options) const;
3824
3825 int8_t
3826 doCaseCompare(int32_t start,
3827 int32_t length,
3828 const char16_t *srcChars,
3829 int32_t srcStart,
3830 int32_t srcLength,
3831 uint32_t options) const;
3832
3833 int32_t doIndexOf(char16_t c,
3834 int32_t start,
3835 int32_t length) const;
3836
3837 int32_t doIndexOf(UChar32 c,
3838 int32_t start,
3839 int32_t length) const;
3840
3841 int32_t doLastIndexOf(char16_t c,
3842 int32_t start,
3843 int32_t length) const;
3844
3845 int32_t doLastIndexOf(UChar32 c,
3846 int32_t start,
3847 int32_t length) const;
3848
3849 void doExtract(int32_t start,
3850 int32_t length,
3851 char16_t *dst,
3852 int32_t dstStart) const;
3853
3854 inline void doExtract(int32_t start,
3855 int32_t length,
3856 UnicodeString& target) const;
3857
3858 inline char16_t doCharAt(int32_t offset) const;
3859
3860 UnicodeString& doReplace(int32_t start,
3861 int32_t length,
3862 const UnicodeString& srcText,
3863 int32_t srcStart,
3864 int32_t srcLength);
3865
3866 UnicodeString& doReplace(int32_t start,
3867 int32_t length,
3868 const char16_t *srcChars,
3869 int32_t srcStart,
3870 int32_t srcLength);
3871 UnicodeString& doReplace(int32_t start, int32_t length, std::u16string_view src);
3872
3873 UnicodeString& doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3874 UnicodeString& doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcLength);
3875 UnicodeString& doAppend(std::u16string_view src);
3876
3877 UnicodeString& doReverse(int32_t start,
3878 int32_t length);
3879
3880 // calculate hash code
3881 int32_t doHashCode() const;
3882
3883 // get pointer to start of array
3884 // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
3885 inline char16_t* getArrayStart();
3886 inline const char16_t* getArrayStart() const;
3887
3888 inline UBool hasShortLength() const;
3889 inline int32_t getShortLength() const;
3890
3891 // A UnicodeString object (not necessarily its current buffer)
3892 // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
3893 inline UBool isWritable() const;
3894
3895 // Is the current buffer writable?
3896 inline UBool isBufferWritable() const;
3897
3898 // None of the following does releaseArray().
3899 inline void setZeroLength();
3900 inline void setShortLength(int32_t len);
3901 inline void setLength(int32_t len);
3902 inline void setToEmpty();
3903 inline void setArray(char16_t *array, int32_t len, int32_t capacity); // sets length but not flags
3904
3905 // allocate the array; result may be the stack buffer
3906 // sets refCount to 1 if appropriate
3907 // sets fArray, fCapacity, and flags
3908 // sets length to 0
3909 // returns boolean for success or failure
3910 UBool allocate(int32_t capacity);
3911
3912 // release the array if owned
3913 void releaseArray();
3914
3915 // turn a bogus string into an empty one
3916 void unBogus();
3917
3918 // implements assignment operator, copy constructor, and fastCopyFrom()
3919 UnicodeString ©From(const UnicodeString &src, UBool fastCopy=false);
3920
3921 // Copies just the fields without memory management.
3922 void copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) noexcept;
3923
3924 // Pin start and limit to acceptable values.
3925 inline void pinIndex(int32_t& start) const;
3926 inline void pinIndices(int32_t& start,
3927 int32_t& length) const;
3928
3929 #if !UCONFIG_NO_CONVERSION
3930
3931 /* Internal extract() using UConverter. */
3932 int32_t doExtract(int32_t start, int32_t length,
3933 char *dest, int32_t destCapacity,
3934 UConverter *cnv,
3935 UErrorCode &errorCode) const;
3936
3937 /*
3938 * Real constructor for converting from codepage data.
3939 * It assumes that it is called with !fRefCounted.
3940 *
3941 * If `codepage==0`, then the default converter
3942 * is used for the platform encoding.
3943 * If `codepage` is an empty string (`""`),
3944 * then a simple conversion is performed on the codepage-invariant
3945 * subset ("invariant characters") of the platform encoding. See utypes.h.
3946 */
3947 void doCodepageCreate(const char *codepageData,
3948 int32_t dataLength,
3949 const char *codepage);
3950
3951 /*
3952 * Worker function for creating a UnicodeString from
3953 * a codepage string using a UConverter.
3954 */
3955 void
3956 doCodepageCreate(const char *codepageData,
3957 int32_t dataLength,
3958 UConverter *converter,
3959 UErrorCode &status);
3960
3961 #endif
3962
3963 /*
3964 * This function is called when write access to the array
3965 * is necessary.
3966 *
3967 * We need to make a copy of the array if
3968 * the buffer is read-only, or
3969 * the buffer is refCounted (shared), and refCount>1, or
3970 * the buffer is too small.
3971 *
3972 * Return false if memory could not be allocated.
3973 */
3974 UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
3975 int32_t growCapacity = -1,
3976 UBool doCopyArray = true,
3977 int32_t** pBufferToDelete = nullptr,
3978 UBool forceClone = false);
3979
3980 /**
3981 * Common function for UnicodeString case mappings.
3982 * The stringCaseMapper has the same type UStringCaseMapper
3983 * as in ustr_imp.h for ustrcase_map().
3984 */
3985 UnicodeString &
3986 caseMap(int32_t caseLocale, uint32_t options,
3987 #if !UCONFIG_NO_BREAK_ITERATION
3988 BreakIterator *iter,
3989 #endif
3990 UStringCaseMapper *stringCaseMapper);
3991
3992 // ref counting
3993 void addRef();
3994 int32_t removeRef();
3995 int32_t refCount() const;
3996
3997 // constants
3998 enum {
3999 /**
4000 * Size of stack buffer for short strings.
4001 * Must be at least U16_MAX_LENGTH for the single-code point constructor to work.
4002 * @see UNISTR_OBJECT_SIZE
4003 */
4004 US_STACKBUF_SIZE = static_cast<int32_t>(UNISTR_OBJECT_SIZE - sizeof(void*) - 2) / U_SIZEOF_UCHAR,
4005 kInvalidUChar=0xffff, // U+FFFF returned by charAt(invalid index)
4006 kInvalidHashCode=0, // invalid hash code
4007 kEmptyHashCode=1, // hash code for empty string
4008
4009 // bit flag values for fLengthAndFlags
4010 kIsBogus=1, // this string is bogus, i.e., not valid or nullptr
4011 kUsingStackBuffer=2,// using fUnion.fStackFields instead of fUnion.fFields
4012 kRefCounted=4, // there is a refCount field before the characters in fArray
4013 kBufferIsReadonly=8,// do not write to this buffer
4014 kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"),
4015 // and releaseBuffer(newLength) must be called
4016 kAllStorageFlags=0x1f,
4017
4018 kLengthShift=5, // remaining 11 bits for non-negative short length, or negative if long
4019 kLength1=1<<kLengthShift,
4020 kMaxShortLength=0x3ff, // max non-negative short length (leaves top bit 0)
4021 kLengthIsLarge=0xffe0, // short length < 0, real length is in fUnion.fFields.fLength
4022
4023 // combined values for convenience
4024 kShortString=kUsingStackBuffer,
4025 kLongString=kRefCounted,
4026 kReadonlyAlias=kBufferIsReadonly,
4027 kWritableAlias=0
4028 };
4029
4030 friend class UnicodeStringAppendable;
4031
4032 union StackBufferOrFields; // forward declaration necessary before friend declaration
4033 friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
4034
4035 /*
4036 * The following are all the class fields that are stored
4037 * in each UnicodeString object.
4038 * Note that UnicodeString has virtual functions,
4039 * therefore there is an implicit vtable pointer
4040 * as the first real field.
4041 * The fields should be aligned such that no padding is necessary.
4042 * On 32-bit machines, the size should be 32 bytes,
4043 * on 64-bit machines (8-byte pointers), it should be 40 bytes.
4044 *
4045 * We use a hack to achieve this.
4046 *
4047 * With at least some compilers, each of the following is forced to
4048 * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
4049 * rounded up with additional padding if the fields do not already fit that requirement:
4050 * - sizeof(class UnicodeString)
4051 * - offsetof(UnicodeString, fUnion)
4052 * - sizeof(fUnion)
4053 * - sizeof(fStackFields)
4054 *
4055 * We optimize for the longest possible internal buffer for short strings.
4056 * fUnion.fStackFields begins with 2 bytes for storage flags
4057 * and the length of relatively short strings,
4058 * followed by the buffer for short string contents.
4059 * There is no padding inside fStackFields.
4060 *
4061 * Heap-allocated and aliased strings use fUnion.fFields.
4062 * Both fStackFields and fFields must begin with the same fields for flags and short length,
4063 * that is, those must have the same memory offsets inside the object,
4064 * because the flags must be inspected in order to decide which half of fUnion is being used.
4065 * We assume that the compiler does not reorder the fields.
4066 *
4067 * (Padding at the end of fFields is ok:
4068 * As long as it is no larger than fStackFields, it is not wasted space.)
4069 *
4070 * For some of the history of the UnicodeString class fields layout, see
4071 * - ICU ticket #11551 "longer UnicodeString contents in stack buffer"
4072 * - ICU ticket #11336 "UnicodeString: recombine stack buffer arrays"
4073 * - ICU ticket #8322 "why is sizeof(UnicodeString)==48?"
4074 */
4075 // (implicit) *vtable;
4076 union StackBufferOrFields {
4077 // fStackFields is used iff (fLengthAndFlags&kUsingStackBuffer) else fFields is used.
4078 // Each struct of the union must begin with fLengthAndFlags.
4079 struct {
4080 int16_t fLengthAndFlags; // bit fields: see constants above
4081 char16_t fBuffer[US_STACKBUF_SIZE]; // buffer for short strings
4082 } fStackFields;
4083 struct {
4084 int16_t fLengthAndFlags; // bit fields: see constants above
4085 int32_t fLength; // number of characters in fArray if >127; else undefined
4086 int32_t fCapacity; // capacity of fArray (in char16_ts)
4087 // array pointer last to minimize padding for machines with P128 data model
4088 // or pointer sizes that are not a power of 2
4089 char16_t *fArray; // the Unicode data
4090 } fFields;
4091 } fUnion;
4092 };
4093
4094 /**
4095 * Creates a new UnicodeString from the concatenation of two others.
4096 *
4097 * @param s1 The first string to be copied to the new one.
4098 * @param s2 The second string to be copied to the new one, after s1.
4099 * @return UnicodeString(s1).append(s2)
4100 * @stable ICU 2.8
4101 */
4102 U_COMMON_API UnicodeString U_EXPORT2
4103 operator+ (const UnicodeString &s1, const UnicodeString &s2);
4104
4105 #ifndef U_HIDE_DRAFT_API
4106 /**
4107 * Creates a new UnicodeString from the concatenation of a UnicodeString and `s2`
4108 * which is, or which is implicitly convertible to,
4109 * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.
4110 *
4111 * @param s1 The string to be copied to the new one.
4112 * @param s2 The string view to be copied to the new string, after s1.
4113 * @return UnicodeString(s1).append(s2)
4114 * @draft ICU 76
4115 */
4116 template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
4117 inline UnicodeString operator+(const UnicodeString &s1, const S &s2) {
4118 return unistr_internalConcat(s1, internal::toU16StringView(s2));
4119 }
4120 #endif // U_HIDE_DRAFT_API
4121
4122 #ifndef U_FORCE_HIDE_INTERNAL_API
4123 /** @internal */
4124 U_COMMON_API UnicodeString U_EXPORT2
4125 unistr_internalConcat(const UnicodeString &s1, std::u16string_view s2);
4126 #endif
4127
4128 //========================================
4129 // Inline members
4130 //========================================
4131
4132 //========================================
4133 // Privates
4134 //========================================
4135
4136 inline void
pinIndex(int32_t & start)4137 UnicodeString::pinIndex(int32_t& start) const
4138 {
4139 // pin index
4140 if(start < 0) {
4141 start = 0;
4142 } else if(start > length()) {
4143 start = length();
4144 }
4145 }
4146
4147 inline void
pinIndices(int32_t & start,int32_t & _length)4148 UnicodeString::pinIndices(int32_t& start,
4149 int32_t& _length) const
4150 {
4151 // pin indices
4152 int32_t len = length();
4153 if(start < 0) {
4154 start = 0;
4155 } else if(start > len) {
4156 start = len;
4157 }
4158 if(_length < 0) {
4159 _length = 0;
4160 } else if(_length > (len - start)) {
4161 _length = (len - start);
4162 }
4163 }
4164
4165 inline char16_t*
getArrayStart()4166 UnicodeString::getArrayStart() {
4167 return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
4168 fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
4169 }
4170
4171 inline const char16_t*
getArrayStart()4172 UnicodeString::getArrayStart() const {
4173 return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
4174 fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
4175 }
4176
4177 //========================================
4178 // Default constructor
4179 //========================================
4180
4181 inline
UnicodeString()4182 UnicodeString::UnicodeString() {
4183 fUnion.fStackFields.fLengthAndFlags=kShortString;
4184 }
4185
UnicodeString(const std::nullptr_t)4186 inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/) {
4187 fUnion.fStackFields.fLengthAndFlags=kShortString;
4188 }
4189
UnicodeString(const std::nullptr_t,int32_t)4190 inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/, int32_t /*length*/) {
4191 fUnion.fStackFields.fLengthAndFlags=kShortString;
4192 }
4193
UnicodeString(std::nullptr_t,int32_t,int32_t)4194 inline UnicodeString::UnicodeString(std::nullptr_t /*buffer*/, int32_t /*buffLength*/, int32_t /*buffCapacity*/) {
4195 fUnion.fStackFields.fLengthAndFlags=kShortString;
4196 }
4197
4198 //========================================
4199 // Read-only implementation methods
4200 //========================================
4201 inline UBool
hasShortLength()4202 UnicodeString::hasShortLength() const {
4203 return fUnion.fFields.fLengthAndFlags>=0;
4204 }
4205
4206 inline int32_t
getShortLength()4207 UnicodeString::getShortLength() const {
4208 // fLengthAndFlags must be non-negative -> short length >= 0
4209 // and arithmetic or logical shift does not matter.
4210 return fUnion.fFields.fLengthAndFlags>>kLengthShift;
4211 }
4212
4213 inline int32_t
length()4214 UnicodeString::length() const {
4215 return hasShortLength() ? getShortLength() : fUnion.fFields.fLength;
4216 }
4217
4218 inline int32_t
getCapacity()4219 UnicodeString::getCapacity() const {
4220 return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
4221 US_STACKBUF_SIZE : fUnion.fFields.fCapacity;
4222 }
4223
4224 inline int32_t
hashCode()4225 UnicodeString::hashCode() const
4226 { return doHashCode(); }
4227
4228 inline UBool
isBogus()4229 UnicodeString::isBogus() const
4230 { return fUnion.fFields.fLengthAndFlags & kIsBogus; }
4231
4232 inline UBool
isWritable()4233 UnicodeString::isWritable() const
4234 { return !(fUnion.fFields.fLengthAndFlags & (kOpenGetBuffer | kIsBogus)); }
4235
4236 inline UBool
isBufferWritable()4237 UnicodeString::isBufferWritable() const
4238 {
4239 return
4240 !(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
4241 (!(fUnion.fFields.fLengthAndFlags&kRefCounted) || refCount()==1);
4242 }
4243
4244 inline const char16_t *
getBuffer()4245 UnicodeString::getBuffer() const {
4246 if(fUnion.fFields.fLengthAndFlags&(kIsBogus|kOpenGetBuffer)) {
4247 return nullptr;
4248 } else if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) {
4249 return fUnion.fStackFields.fBuffer;
4250 } else {
4251 return fUnion.fFields.fArray;
4252 }
4253 }
4254
4255 //========================================
4256 // Read-only alias methods
4257 //========================================
4258 inline int8_t
doCompare(int32_t start,int32_t thisLength,const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)4259 UnicodeString::doCompare(int32_t start,
4260 int32_t thisLength,
4261 const UnicodeString& srcText,
4262 int32_t srcStart,
4263 int32_t srcLength) const
4264 {
4265 if(srcText.isBogus()) {
4266 return static_cast<int8_t>(!isBogus()); // 0 if both are bogus, 1 otherwise
4267 } else {
4268 srcText.pinIndices(srcStart, srcLength);
4269 return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
4270 }
4271 }
4272
4273 inline UBool
doEqualsSubstring(int32_t start,int32_t thisLength,const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)4274 UnicodeString::doEqualsSubstring(int32_t start,
4275 int32_t thisLength,
4276 const UnicodeString& srcText,
4277 int32_t srcStart,
4278 int32_t srcLength) const
4279 {
4280 if(srcText.isBogus()) {
4281 return isBogus();
4282 } else {
4283 srcText.pinIndices(srcStart, srcLength);
4284 return !isBogus() && doEqualsSubstring(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
4285 }
4286 }
4287
4288 inline bool
4289 UnicodeString::operator== (const UnicodeString& text) const
4290 {
4291 if(isBogus()) {
4292 return text.isBogus();
4293 } else {
4294 int32_t len = length(), textLength = text.length();
4295 return !text.isBogus() && len == textLength && doEquals(text, len);
4296 }
4297 }
4298
4299 inline bool
4300 UnicodeString::operator!= (const UnicodeString& text) const
4301 { return (! operator==(text)); }
4302
4303 inline UBool
4304 UnicodeString::operator> (const UnicodeString& text) const
4305 { return doCompare(0, length(), text, 0, text.length()) == 1; }
4306
4307 inline UBool
4308 UnicodeString::operator< (const UnicodeString& text) const
4309 { return doCompare(0, length(), text, 0, text.length()) == -1; }
4310
4311 inline UBool
4312 UnicodeString::operator>= (const UnicodeString& text) const
4313 { return doCompare(0, length(), text, 0, text.length()) != -1; }
4314
4315 inline UBool
4316 UnicodeString::operator<= (const UnicodeString& text) const
4317 { return doCompare(0, length(), text, 0, text.length()) != 1; }
4318
4319 inline int8_t
compare(const UnicodeString & text)4320 UnicodeString::compare(const UnicodeString& text) const
4321 { return doCompare(0, length(), text, 0, text.length()); }
4322
4323 inline int8_t
compare(int32_t start,int32_t _length,const UnicodeString & srcText)4324 UnicodeString::compare(int32_t start,
4325 int32_t _length,
4326 const UnicodeString& srcText) const
4327 { return doCompare(start, _length, srcText, 0, srcText.length()); }
4328
4329 inline int8_t
compare(ConstChar16Ptr srcChars,int32_t srcLength)4330 UnicodeString::compare(ConstChar16Ptr srcChars,
4331 int32_t srcLength) const
4332 { return doCompare(0, length(), srcChars, 0, srcLength); }
4333
4334 inline int8_t
compare(int32_t start,int32_t _length,const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)4335 UnicodeString::compare(int32_t start,
4336 int32_t _length,
4337 const UnicodeString& srcText,
4338 int32_t srcStart,
4339 int32_t srcLength) const
4340 { return doCompare(start, _length, srcText, srcStart, srcLength); }
4341
4342 inline int8_t
compare(int32_t start,int32_t _length,const char16_t * srcChars)4343 UnicodeString::compare(int32_t start,
4344 int32_t _length,
4345 const char16_t *srcChars) const
4346 { return doCompare(start, _length, srcChars, 0, _length); }
4347
4348 inline int8_t
compare(int32_t start,int32_t _length,const char16_t * srcChars,int32_t srcStart,int32_t srcLength)4349 UnicodeString::compare(int32_t start,
4350 int32_t _length,
4351 const char16_t *srcChars,
4352 int32_t srcStart,
4353 int32_t srcLength) const
4354 { return doCompare(start, _length, srcChars, srcStart, srcLength); }
4355
4356 inline int8_t
compareBetween(int32_t start,int32_t limit,const UnicodeString & srcText,int32_t srcStart,int32_t srcLimit)4357 UnicodeString::compareBetween(int32_t start,
4358 int32_t limit,
4359 const UnicodeString& srcText,
4360 int32_t srcStart,
4361 int32_t srcLimit) const
4362 { return doCompare(start, limit - start,
4363 srcText, srcStart, srcLimit - srcStart); }
4364
4365 inline int8_t
doCompareCodePointOrder(int32_t start,int32_t thisLength,const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)4366 UnicodeString::doCompareCodePointOrder(int32_t start,
4367 int32_t thisLength,
4368 const UnicodeString& srcText,
4369 int32_t srcStart,
4370 int32_t srcLength) const
4371 {
4372 if(srcText.isBogus()) {
4373 return static_cast<int8_t>(!isBogus()); // 0 if both are bogus, 1 otherwise
4374 } else {
4375 srcText.pinIndices(srcStart, srcLength);
4376 return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
4377 }
4378 }
4379
4380 inline int8_t
compareCodePointOrder(const UnicodeString & text)4381 UnicodeString::compareCodePointOrder(const UnicodeString& text) const
4382 { return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
4383
4384 inline int8_t
compareCodePointOrder(int32_t start,int32_t _length,const UnicodeString & srcText)4385 UnicodeString::compareCodePointOrder(int32_t start,
4386 int32_t _length,
4387 const UnicodeString& srcText) const
4388 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
4389
4390 inline int8_t
compareCodePointOrder(ConstChar16Ptr srcChars,int32_t srcLength)4391 UnicodeString::compareCodePointOrder(ConstChar16Ptr srcChars,
4392 int32_t srcLength) const
4393 { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
4394
4395 inline int8_t
compareCodePointOrder(int32_t start,int32_t _length,const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)4396 UnicodeString::compareCodePointOrder(int32_t start,
4397 int32_t _length,
4398 const UnicodeString& srcText,
4399 int32_t srcStart,
4400 int32_t srcLength) const
4401 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
4402
4403 inline int8_t
compareCodePointOrder(int32_t start,int32_t _length,const char16_t * srcChars)4404 UnicodeString::compareCodePointOrder(int32_t start,
4405 int32_t _length,
4406 const char16_t *srcChars) const
4407 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
4408
4409 inline int8_t
compareCodePointOrder(int32_t start,int32_t _length,const char16_t * srcChars,int32_t srcStart,int32_t srcLength)4410 UnicodeString::compareCodePointOrder(int32_t start,
4411 int32_t _length,
4412 const char16_t *srcChars,
4413 int32_t srcStart,
4414 int32_t srcLength) const
4415 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
4416
4417 inline int8_t
compareCodePointOrderBetween(int32_t start,int32_t limit,const UnicodeString & srcText,int32_t srcStart,int32_t srcLimit)4418 UnicodeString::compareCodePointOrderBetween(int32_t start,
4419 int32_t limit,
4420 const UnicodeString& srcText,
4421 int32_t srcStart,
4422 int32_t srcLimit) const
4423 { return doCompareCodePointOrder(start, limit - start,
4424 srcText, srcStart, srcLimit - srcStart); }
4425
4426 inline int8_t
doCaseCompare(int32_t start,int32_t thisLength,const UnicodeString & srcText,int32_t srcStart,int32_t srcLength,uint32_t options)4427 UnicodeString::doCaseCompare(int32_t start,
4428 int32_t thisLength,
4429 const UnicodeString &srcText,
4430 int32_t srcStart,
4431 int32_t srcLength,
4432 uint32_t options) const
4433 {
4434 if(srcText.isBogus()) {
4435 return static_cast<int8_t>(!isBogus()); // 0 if both are bogus, 1 otherwise
4436 } else {
4437 srcText.pinIndices(srcStart, srcLength);
4438 return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
4439 }
4440 }
4441
4442 inline int8_t
caseCompare(const UnicodeString & text,uint32_t options)4443 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
4444 return doCaseCompare(0, length(), text, 0, text.length(), options);
4445 }
4446
4447 inline int8_t
caseCompare(int32_t start,int32_t _length,const UnicodeString & srcText,uint32_t options)4448 UnicodeString::caseCompare(int32_t start,
4449 int32_t _length,
4450 const UnicodeString &srcText,
4451 uint32_t options) const {
4452 return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
4453 }
4454
4455 inline int8_t
caseCompare(ConstChar16Ptr srcChars,int32_t srcLength,uint32_t options)4456 UnicodeString::caseCompare(ConstChar16Ptr srcChars,
4457 int32_t srcLength,
4458 uint32_t options) const {
4459 return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
4460 }
4461
4462 inline int8_t
caseCompare(int32_t start,int32_t _length,const UnicodeString & srcText,int32_t srcStart,int32_t srcLength,uint32_t options)4463 UnicodeString::caseCompare(int32_t start,
4464 int32_t _length,
4465 const UnicodeString &srcText,
4466 int32_t srcStart,
4467 int32_t srcLength,
4468 uint32_t options) const {
4469 return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
4470 }
4471
4472 inline int8_t
caseCompare(int32_t start,int32_t _length,const char16_t * srcChars,uint32_t options)4473 UnicodeString::caseCompare(int32_t start,
4474 int32_t _length,
4475 const char16_t *srcChars,
4476 uint32_t options) const {
4477 return doCaseCompare(start, _length, srcChars, 0, _length, options);
4478 }
4479
4480 inline int8_t
caseCompare(int32_t start,int32_t _length,const char16_t * srcChars,int32_t srcStart,int32_t srcLength,uint32_t options)4481 UnicodeString::caseCompare(int32_t start,
4482 int32_t _length,
4483 const char16_t *srcChars,
4484 int32_t srcStart,
4485 int32_t srcLength,
4486 uint32_t options) const {
4487 return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
4488 }
4489
4490 inline int8_t
caseCompareBetween(int32_t start,int32_t limit,const UnicodeString & srcText,int32_t srcStart,int32_t srcLimit,uint32_t options)4491 UnicodeString::caseCompareBetween(int32_t start,
4492 int32_t limit,
4493 const UnicodeString &srcText,
4494 int32_t srcStart,
4495 int32_t srcLimit,
4496 uint32_t options) const {
4497 return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
4498 }
4499
4500 inline int32_t
indexOf(const UnicodeString & srcText,int32_t srcStart,int32_t srcLength,int32_t start,int32_t _length)4501 UnicodeString::indexOf(const UnicodeString& srcText,
4502 int32_t srcStart,
4503 int32_t srcLength,
4504 int32_t start,
4505 int32_t _length) const
4506 {
4507 if(!srcText.isBogus()) {
4508 srcText.pinIndices(srcStart, srcLength);
4509 if(srcLength > 0) {
4510 return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
4511 }
4512 }
4513 return -1;
4514 }
4515
4516 inline int32_t
indexOf(const UnicodeString & text)4517 UnicodeString::indexOf(const UnicodeString& text) const
4518 { return indexOf(text, 0, text.length(), 0, length()); }
4519
4520 inline int32_t
indexOf(const UnicodeString & text,int32_t start)4521 UnicodeString::indexOf(const UnicodeString& text,
4522 int32_t start) const {
4523 pinIndex(start);
4524 return indexOf(text, 0, text.length(), start, length() - start);
4525 }
4526
4527 inline int32_t
indexOf(const UnicodeString & text,int32_t start,int32_t _length)4528 UnicodeString::indexOf(const UnicodeString& text,
4529 int32_t start,
4530 int32_t _length) const
4531 { return indexOf(text, 0, text.length(), start, _length); }
4532
4533 inline int32_t
indexOf(const char16_t * srcChars,int32_t srcLength,int32_t start)4534 UnicodeString::indexOf(const char16_t *srcChars,
4535 int32_t srcLength,
4536 int32_t start) const {
4537 pinIndex(start);
4538 return indexOf(srcChars, 0, srcLength, start, length() - start);
4539 }
4540
4541 inline int32_t
indexOf(ConstChar16Ptr srcChars,int32_t srcLength,int32_t start,int32_t _length)4542 UnicodeString::indexOf(ConstChar16Ptr srcChars,
4543 int32_t srcLength,
4544 int32_t start,
4545 int32_t _length) const
4546 { return indexOf(srcChars, 0, srcLength, start, _length); }
4547
4548 inline int32_t
indexOf(char16_t c,int32_t start,int32_t _length)4549 UnicodeString::indexOf(char16_t c,
4550 int32_t start,
4551 int32_t _length) const
4552 { return doIndexOf(c, start, _length); }
4553
4554 inline int32_t
indexOf(UChar32 c,int32_t start,int32_t _length)4555 UnicodeString::indexOf(UChar32 c,
4556 int32_t start,
4557 int32_t _length) const
4558 { return doIndexOf(c, start, _length); }
4559
4560 inline int32_t
indexOf(char16_t c)4561 UnicodeString::indexOf(char16_t c) const
4562 { return doIndexOf(c, 0, length()); }
4563
4564 inline int32_t
indexOf(UChar32 c)4565 UnicodeString::indexOf(UChar32 c) const
4566 { return indexOf(c, 0, length()); }
4567
4568 inline int32_t
indexOf(char16_t c,int32_t start)4569 UnicodeString::indexOf(char16_t c,
4570 int32_t start) const {
4571 pinIndex(start);
4572 return doIndexOf(c, start, length() - start);
4573 }
4574
4575 inline int32_t
indexOf(UChar32 c,int32_t start)4576 UnicodeString::indexOf(UChar32 c,
4577 int32_t start) const {
4578 pinIndex(start);
4579 return indexOf(c, start, length() - start);
4580 }
4581
4582 inline int32_t
lastIndexOf(ConstChar16Ptr srcChars,int32_t srcLength,int32_t start,int32_t _length)4583 UnicodeString::lastIndexOf(ConstChar16Ptr srcChars,
4584 int32_t srcLength,
4585 int32_t start,
4586 int32_t _length) const
4587 { return lastIndexOf(srcChars, 0, srcLength, start, _length); }
4588
4589 inline int32_t
lastIndexOf(const char16_t * srcChars,int32_t srcLength,int32_t start)4590 UnicodeString::lastIndexOf(const char16_t *srcChars,
4591 int32_t srcLength,
4592 int32_t start) const {
4593 pinIndex(start);
4594 return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
4595 }
4596
4597 inline int32_t
lastIndexOf(const UnicodeString & srcText,int32_t srcStart,int32_t srcLength,int32_t start,int32_t _length)4598 UnicodeString::lastIndexOf(const UnicodeString& srcText,
4599 int32_t srcStart,
4600 int32_t srcLength,
4601 int32_t start,
4602 int32_t _length) const
4603 {
4604 if(!srcText.isBogus()) {
4605 srcText.pinIndices(srcStart, srcLength);
4606 if(srcLength > 0) {
4607 return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
4608 }
4609 }
4610 return -1;
4611 }
4612
4613 inline int32_t
lastIndexOf(const UnicodeString & text,int32_t start,int32_t _length)4614 UnicodeString::lastIndexOf(const UnicodeString& text,
4615 int32_t start,
4616 int32_t _length) const
4617 { return lastIndexOf(text, 0, text.length(), start, _length); }
4618
4619 inline int32_t
lastIndexOf(const UnicodeString & text,int32_t start)4620 UnicodeString::lastIndexOf(const UnicodeString& text,
4621 int32_t start) const {
4622 pinIndex(start);
4623 return lastIndexOf(text, 0, text.length(), start, length() - start);
4624 }
4625
4626 inline int32_t
lastIndexOf(const UnicodeString & text)4627 UnicodeString::lastIndexOf(const UnicodeString& text) const
4628 { return lastIndexOf(text, 0, text.length(), 0, length()); }
4629
4630 inline int32_t
lastIndexOf(char16_t c,int32_t start,int32_t _length)4631 UnicodeString::lastIndexOf(char16_t c,
4632 int32_t start,
4633 int32_t _length) const
4634 { return doLastIndexOf(c, start, _length); }
4635
4636 inline int32_t
lastIndexOf(UChar32 c,int32_t start,int32_t _length)4637 UnicodeString::lastIndexOf(UChar32 c,
4638 int32_t start,
4639 int32_t _length) const {
4640 return doLastIndexOf(c, start, _length);
4641 }
4642
4643 inline int32_t
lastIndexOf(char16_t c)4644 UnicodeString::lastIndexOf(char16_t c) const
4645 { return doLastIndexOf(c, 0, length()); }
4646
4647 inline int32_t
lastIndexOf(UChar32 c)4648 UnicodeString::lastIndexOf(UChar32 c) const {
4649 return lastIndexOf(c, 0, length());
4650 }
4651
4652 inline int32_t
lastIndexOf(char16_t c,int32_t start)4653 UnicodeString::lastIndexOf(char16_t c,
4654 int32_t start) const {
4655 pinIndex(start);
4656 return doLastIndexOf(c, start, length() - start);
4657 }
4658
4659 inline int32_t
lastIndexOf(UChar32 c,int32_t start)4660 UnicodeString::lastIndexOf(UChar32 c,
4661 int32_t start) const {
4662 pinIndex(start);
4663 return lastIndexOf(c, start, length() - start);
4664 }
4665
4666 inline UBool
startsWith(const UnicodeString & text)4667 UnicodeString::startsWith(const UnicodeString& text) const
4668 { return doEqualsSubstring(0, text.length(), text, 0, text.length()); }
4669
4670 inline UBool
startsWith(const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)4671 UnicodeString::startsWith(const UnicodeString& srcText,
4672 int32_t srcStart,
4673 int32_t srcLength) const
4674 { return doEqualsSubstring(0, srcLength, srcText, srcStart, srcLength); }
4675
4676 inline UBool
startsWith(ConstChar16Ptr srcChars,int32_t srcLength)4677 UnicodeString::startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const {
4678 if(srcLength < 0) {
4679 srcLength = u_strlen(toUCharPtr(srcChars));
4680 }
4681 return doEqualsSubstring(0, srcLength, srcChars, 0, srcLength);
4682 }
4683
4684 inline UBool
startsWith(const char16_t * srcChars,int32_t srcStart,int32_t srcLength)4685 UnicodeString::startsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const {
4686 if(srcLength < 0) {
4687 srcLength = u_strlen(toUCharPtr(srcChars));
4688 }
4689 return doEqualsSubstring(0, srcLength, srcChars, srcStart, srcLength);
4690 }
4691
4692 inline UBool
endsWith(const UnicodeString & text)4693 UnicodeString::endsWith(const UnicodeString& text) const
4694 { return doEqualsSubstring(length() - text.length(), text.length(),
4695 text, 0, text.length()); }
4696
4697 inline UBool
endsWith(const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)4698 UnicodeString::endsWith(const UnicodeString& srcText,
4699 int32_t srcStart,
4700 int32_t srcLength) const {
4701 srcText.pinIndices(srcStart, srcLength);
4702 return doEqualsSubstring(length() - srcLength, srcLength,
4703 srcText, srcStart, srcLength);
4704 }
4705
4706 inline UBool
endsWith(ConstChar16Ptr srcChars,int32_t srcLength)4707 UnicodeString::endsWith(ConstChar16Ptr srcChars,
4708 int32_t srcLength) const {
4709 if(srcLength < 0) {
4710 srcLength = u_strlen(toUCharPtr(srcChars));
4711 }
4712 return doEqualsSubstring(length() - srcLength, srcLength, srcChars, 0, srcLength);
4713 }
4714
4715 inline UBool
endsWith(const char16_t * srcChars,int32_t srcStart,int32_t srcLength)4716 UnicodeString::endsWith(const char16_t *srcChars,
4717 int32_t srcStart,
4718 int32_t srcLength) const {
4719 if(srcLength < 0) {
4720 srcLength = u_strlen(toUCharPtr(srcChars + srcStart));
4721 }
4722 return doEqualsSubstring(length() - srcLength, srcLength,
4723 srcChars, srcStart, srcLength);
4724 }
4725
4726 //========================================
4727 // replace
4728 //========================================
4729 inline UnicodeString&
replace(int32_t start,int32_t _length,const UnicodeString & srcText)4730 UnicodeString::replace(int32_t start,
4731 int32_t _length,
4732 const UnicodeString& srcText)
4733 { return doReplace(start, _length, srcText, 0, srcText.length()); }
4734
4735 inline UnicodeString&
replace(int32_t start,int32_t _length,const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)4736 UnicodeString::replace(int32_t start,
4737 int32_t _length,
4738 const UnicodeString& srcText,
4739 int32_t srcStart,
4740 int32_t srcLength)
4741 { return doReplace(start, _length, srcText, srcStart, srcLength); }
4742
4743 inline UnicodeString&
replace(int32_t start,int32_t _length,ConstChar16Ptr srcChars,int32_t srcLength)4744 UnicodeString::replace(int32_t start,
4745 int32_t _length,
4746 ConstChar16Ptr srcChars,
4747 int32_t srcLength)
4748 { return doReplace(start, _length, srcChars, 0, srcLength); }
4749
4750 inline UnicodeString&
replace(int32_t start,int32_t _length,const char16_t * srcChars,int32_t srcStart,int32_t srcLength)4751 UnicodeString::replace(int32_t start,
4752 int32_t _length,
4753 const char16_t *srcChars,
4754 int32_t srcStart,
4755 int32_t srcLength)
4756 { return doReplace(start, _length, srcChars, srcStart, srcLength); }
4757
4758 inline UnicodeString&
replace(int32_t start,int32_t _length,char16_t srcChar)4759 UnicodeString::replace(int32_t start,
4760 int32_t _length,
4761 char16_t srcChar)
4762 { return doReplace(start, _length, &srcChar, 0, 1); }
4763
4764 inline UnicodeString&
replaceBetween(int32_t start,int32_t limit,const UnicodeString & srcText)4765 UnicodeString::replaceBetween(int32_t start,
4766 int32_t limit,
4767 const UnicodeString& srcText)
4768 { return doReplace(start, limit - start, srcText, 0, srcText.length()); }
4769
4770 inline UnicodeString&
replaceBetween(int32_t start,int32_t limit,const UnicodeString & srcText,int32_t srcStart,int32_t srcLimit)4771 UnicodeString::replaceBetween(int32_t start,
4772 int32_t limit,
4773 const UnicodeString& srcText,
4774 int32_t srcStart,
4775 int32_t srcLimit)
4776 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
4777
4778 inline UnicodeString&
findAndReplace(const UnicodeString & oldText,const UnicodeString & newText)4779 UnicodeString::findAndReplace(const UnicodeString& oldText,
4780 const UnicodeString& newText)
4781 { return findAndReplace(0, length(), oldText, 0, oldText.length(),
4782 newText, 0, newText.length()); }
4783
4784 inline UnicodeString&
findAndReplace(int32_t start,int32_t _length,const UnicodeString & oldText,const UnicodeString & newText)4785 UnicodeString::findAndReplace(int32_t start,
4786 int32_t _length,
4787 const UnicodeString& oldText,
4788 const UnicodeString& newText)
4789 { return findAndReplace(start, _length, oldText, 0, oldText.length(),
4790 newText, 0, newText.length()); }
4791
4792 // ============================
4793 // extract
4794 // ============================
4795 inline void
doExtract(int32_t start,int32_t _length,UnicodeString & target)4796 UnicodeString::doExtract(int32_t start,
4797 int32_t _length,
4798 UnicodeString& target) const
4799 { target.replace(0, target.length(), *this, start, _length); }
4800
4801 inline void
extract(int32_t start,int32_t _length,Char16Ptr target,int32_t targetStart)4802 UnicodeString::extract(int32_t start,
4803 int32_t _length,
4804 Char16Ptr target,
4805 int32_t targetStart) const
4806 { doExtract(start, _length, target, targetStart); }
4807
4808 inline void
extract(int32_t start,int32_t _length,UnicodeString & target)4809 UnicodeString::extract(int32_t start,
4810 int32_t _length,
4811 UnicodeString& target) const
4812 { doExtract(start, _length, target); }
4813
4814 #if !UCONFIG_NO_CONVERSION
4815
4816 inline int32_t
extract(int32_t start,int32_t _length,char * dst,const char * codepage)4817 UnicodeString::extract(int32_t start,
4818 int32_t _length,
4819 char *dst,
4820 const char *codepage) const
4821
4822 {
4823 // This dstSize value will be checked explicitly
4824 return extract(start, _length, dst, dst != nullptr ? 0xffffffff : 0, codepage);
4825 }
4826
4827 #endif
4828
4829 inline void
extractBetween(int32_t start,int32_t limit,char16_t * dst,int32_t dstStart)4830 UnicodeString::extractBetween(int32_t start,
4831 int32_t limit,
4832 char16_t *dst,
4833 int32_t dstStart) const {
4834 pinIndex(start);
4835 pinIndex(limit);
4836 doExtract(start, limit - start, dst, dstStart);
4837 }
4838
4839 inline UnicodeString
tempSubStringBetween(int32_t start,int32_t limit)4840 UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
4841 return tempSubString(start, limit - start);
4842 }
4843
4844 inline char16_t
doCharAt(int32_t offset)4845 UnicodeString::doCharAt(int32_t offset) const
4846 {
4847 if (static_cast<uint32_t>(offset) < static_cast<uint32_t>(length())) {
4848 return getArrayStart()[offset];
4849 } else {
4850 return kInvalidUChar;
4851 }
4852 }
4853
4854 inline char16_t
charAt(int32_t offset)4855 UnicodeString::charAt(int32_t offset) const
4856 { return doCharAt(offset); }
4857
4858 inline char16_t
4859 UnicodeString::operator[] (int32_t offset) const
4860 { return doCharAt(offset); }
4861
4862 inline UBool
isEmpty()4863 UnicodeString::isEmpty() const {
4864 // Arithmetic or logical right shift does not matter: only testing for 0.
4865 return (fUnion.fFields.fLengthAndFlags>>kLengthShift) == 0;
4866 }
4867
4868 //========================================
4869 // Write implementation methods
4870 //========================================
4871 inline void
setZeroLength()4872 UnicodeString::setZeroLength() {
4873 fUnion.fFields.fLengthAndFlags &= kAllStorageFlags;
4874 }
4875
4876 inline void
setShortLength(int32_t len)4877 UnicodeString::setShortLength(int32_t len) {
4878 // requires 0 <= len <= kMaxShortLength
4879 fUnion.fFields.fLengthAndFlags =
4880 static_cast<int16_t>((fUnion.fFields.fLengthAndFlags & kAllStorageFlags) | (len << kLengthShift));
4881 }
4882
4883 inline void
setLength(int32_t len)4884 UnicodeString::setLength(int32_t len) {
4885 if(len <= kMaxShortLength) {
4886 setShortLength(len);
4887 } else {
4888 fUnion.fFields.fLengthAndFlags |= kLengthIsLarge;
4889 fUnion.fFields.fLength = len;
4890 }
4891 }
4892
4893 inline void
setToEmpty()4894 UnicodeString::setToEmpty() {
4895 fUnion.fFields.fLengthAndFlags = kShortString;
4896 }
4897
4898 inline void
setArray(char16_t * array,int32_t len,int32_t capacity)4899 UnicodeString::setArray(char16_t *array, int32_t len, int32_t capacity) {
4900 setLength(len);
4901 fUnion.fFields.fArray = array;
4902 fUnion.fFields.fCapacity = capacity;
4903 }
4904
4905 inline UnicodeString&
4906 UnicodeString::operator= (char16_t ch)
4907 { return doReplace(0, length(), &ch, 0, 1); }
4908
4909 inline UnicodeString&
4910 UnicodeString::operator= (UChar32 ch)
4911 { return replace(0, length(), ch); }
4912
4913 inline UnicodeString&
setTo(const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)4914 UnicodeString::setTo(const UnicodeString& srcText,
4915 int32_t srcStart,
4916 int32_t srcLength)
4917 {
4918 unBogus();
4919 return doReplace(0, length(), srcText, srcStart, srcLength);
4920 }
4921
4922 inline UnicodeString&
setTo(const UnicodeString & srcText,int32_t srcStart)4923 UnicodeString::setTo(const UnicodeString& srcText,
4924 int32_t srcStart)
4925 {
4926 unBogus();
4927 srcText.pinIndex(srcStart);
4928 return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
4929 }
4930
4931 inline UnicodeString&
setTo(const UnicodeString & srcText)4932 UnicodeString::setTo(const UnicodeString& srcText)
4933 {
4934 return copyFrom(srcText);
4935 }
4936
4937 inline UnicodeString&
setTo(const char16_t * srcChars,int32_t srcLength)4938 UnicodeString::setTo(const char16_t *srcChars,
4939 int32_t srcLength)
4940 {
4941 unBogus();
4942 return doReplace(0, length(), srcChars, 0, srcLength);
4943 }
4944
4945 inline UnicodeString&
setTo(char16_t srcChar)4946 UnicodeString::setTo(char16_t srcChar)
4947 {
4948 unBogus();
4949 return doReplace(0, length(), &srcChar, 0, 1);
4950 }
4951
4952 inline UnicodeString&
setTo(UChar32 srcChar)4953 UnicodeString::setTo(UChar32 srcChar)
4954 {
4955 unBogus();
4956 return replace(0, length(), srcChar);
4957 }
4958
4959 inline UnicodeString&
append(const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)4960 UnicodeString::append(const UnicodeString& srcText,
4961 int32_t srcStart,
4962 int32_t srcLength)
4963 { return doAppend(srcText, srcStart, srcLength); }
4964
4965 inline UnicodeString&
append(const UnicodeString & srcText)4966 UnicodeString::append(const UnicodeString& srcText)
4967 { return doAppend(srcText, 0, srcText.length()); }
4968
4969 inline UnicodeString&
append(const char16_t * srcChars,int32_t srcStart,int32_t srcLength)4970 UnicodeString::append(const char16_t *srcChars,
4971 int32_t srcStart,
4972 int32_t srcLength)
4973 { return doAppend(srcChars, srcStart, srcLength); }
4974
4975 inline UnicodeString&
append(ConstChar16Ptr srcChars,int32_t srcLength)4976 UnicodeString::append(ConstChar16Ptr srcChars,
4977 int32_t srcLength)
4978 { return doAppend(srcChars, 0, srcLength); }
4979
4980 inline UnicodeString&
append(char16_t srcChar)4981 UnicodeString::append(char16_t srcChar)
4982 { return doAppend(&srcChar, 0, 1); }
4983
4984 inline UnicodeString&
4985 UnicodeString::operator+= (char16_t ch)
4986 { return doAppend(&ch, 0, 1); }
4987
4988 inline UnicodeString&
4989 UnicodeString::operator+= (UChar32 ch) {
4990 return append(ch);
4991 }
4992
4993 inline UnicodeString&
4994 UnicodeString::operator+= (const UnicodeString& srcText)
4995 { return doAppend(srcText, 0, srcText.length()); }
4996
4997 inline UnicodeString&
insert(int32_t start,const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)4998 UnicodeString::insert(int32_t start,
4999 const UnicodeString& srcText,
5000 int32_t srcStart,
5001 int32_t srcLength)
5002 { return doReplace(start, 0, srcText, srcStart, srcLength); }
5003
5004 inline UnicodeString&
insert(int32_t start,const UnicodeString & srcText)5005 UnicodeString::insert(int32_t start,
5006 const UnicodeString& srcText)
5007 { return doReplace(start, 0, srcText, 0, srcText.length()); }
5008
5009 inline UnicodeString&
insert(int32_t start,const char16_t * srcChars,int32_t srcStart,int32_t srcLength)5010 UnicodeString::insert(int32_t start,
5011 const char16_t *srcChars,
5012 int32_t srcStart,
5013 int32_t srcLength)
5014 { return doReplace(start, 0, srcChars, srcStart, srcLength); }
5015
5016 inline UnicodeString&
insert(int32_t start,ConstChar16Ptr srcChars,int32_t srcLength)5017 UnicodeString::insert(int32_t start,
5018 ConstChar16Ptr srcChars,
5019 int32_t srcLength)
5020 { return doReplace(start, 0, srcChars, 0, srcLength); }
5021
5022 inline UnicodeString&
insert(int32_t start,char16_t srcChar)5023 UnicodeString::insert(int32_t start,
5024 char16_t srcChar)
5025 { return doReplace(start, 0, &srcChar, 0, 1); }
5026
5027 inline UnicodeString&
insert(int32_t start,UChar32 srcChar)5028 UnicodeString::insert(int32_t start,
5029 UChar32 srcChar)
5030 { return replace(start, 0, srcChar); }
5031
5032
5033 inline UnicodeString&
remove()5034 UnicodeString::remove()
5035 {
5036 // remove() of a bogus string makes the string empty and non-bogus
5037 if(isBogus()) {
5038 setToEmpty();
5039 } else {
5040 setZeroLength();
5041 }
5042 return *this;
5043 }
5044
5045 inline UnicodeString&
remove(int32_t start,int32_t _length)5046 UnicodeString::remove(int32_t start,
5047 int32_t _length)
5048 {
5049 if(start <= 0 && _length == INT32_MAX) {
5050 // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
5051 return remove();
5052 }
5053 return doReplace(start, _length, nullptr, 0, 0);
5054 }
5055
5056 inline UnicodeString&
removeBetween(int32_t start,int32_t limit)5057 UnicodeString::removeBetween(int32_t start,
5058 int32_t limit)
5059 { return doReplace(start, limit - start, nullptr, 0, 0); }
5060
5061 inline UnicodeString &
retainBetween(int32_t start,int32_t limit)5062 UnicodeString::retainBetween(int32_t start, int32_t limit) {
5063 truncate(limit);
5064 return doReplace(0, start, nullptr, 0, 0);
5065 }
5066
5067 inline UBool
truncate(int32_t targetLength)5068 UnicodeString::truncate(int32_t targetLength)
5069 {
5070 if(isBogus() && targetLength == 0) {
5071 // truncate(0) of a bogus string makes the string empty and non-bogus
5072 unBogus();
5073 return false;
5074 } else if (static_cast<uint32_t>(targetLength) < static_cast<uint32_t>(length())) {
5075 setLength(targetLength);
5076 return true;
5077 } else {
5078 return false;
5079 }
5080 }
5081
5082 inline UnicodeString&
reverse()5083 UnicodeString::reverse()
5084 { return doReverse(0, length()); }
5085
5086 inline UnicodeString&
reverse(int32_t start,int32_t _length)5087 UnicodeString::reverse(int32_t start,
5088 int32_t _length)
5089 { return doReverse(start, _length); }
5090
5091 U_NAMESPACE_END
5092
5093 #endif /* U_SHOW_CPLUSPLUS_API */
5094
5095 #endif
5096