• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 *   Copyright (C) 1998-2016, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 **********************************************************************
8 *
9 * File unistr.h
10 *
11 * Modification History:
12 *
13 *   Date        Name        Description
14 *   09/25/98    stephen     Creation.
15 *   11/11/98    stephen     Changed per 11/9 code review.
16 *   04/20/99    stephen     Overhauled per 4/16 code review.
17 *   11/18/99    aliu        Made to inherit from Replaceable.  Added method
18 *                           handleReplaceBetween(); other methods unchanged.
19 *   06/25/01    grhoten     Remove dependency on iostream.
20 ******************************************************************************
21 */
22 
23 #ifndef UNISTR_H
24 #define UNISTR_H
25 
26 /**
27  * \file
28  * \brief C++ API: Unicode String
29  */
30 
31 #include "unicode/utypes.h"
32 
33 #if U_SHOW_CPLUSPLUS_API
34 
35 #include <cstddef>
36 #include <string_view>
37 #include "unicode/char16ptr.h"
38 #include "unicode/rep.h"
39 #include "unicode/std_string.h"
40 #include "unicode/stringpiece.h"
41 #include "unicode/bytestream.h"
42 
43 struct UConverter;          // unicode/ucnv.h
44 
45 #ifndef USTRING_H
46 /**
47  * \ingroup ustring_ustrlen
48  * @param s Pointer to sequence of UChars.
49  * @return Length of sequence.
50  */
51 U_CAPI int32_t U_EXPORT2 u_strlen(const UChar *s);
52 #endif
53 
54 U_NAMESPACE_BEGIN
55 
56 #if !UCONFIG_NO_BREAK_ITERATION
57 class BreakIterator;        // unicode/brkiter.h
58 #endif
59 class Edits;
60 
61 U_NAMESPACE_END
62 
63 // Not #ifndef U_HIDE_INTERNAL_API because UnicodeString needs the UStringCaseMapper.
64 /**
65  * Internal string case mapping function type.
66  * All error checking must be done.
67  * src and dest must not overlap.
68  * @internal
69  */
70 typedef int32_t U_CALLCONV
71 UStringCaseMapper(int32_t caseLocale, uint32_t options,
72 #if !UCONFIG_NO_BREAK_ITERATION
73                   icu::BreakIterator *iter,
74 #endif
75                   char16_t *dest, int32_t destCapacity,
76                   const char16_t *src, int32_t srcLength,
77                   icu::Edits *edits,
78                   UErrorCode &errorCode);
79 
80 U_NAMESPACE_BEGIN
81 
82 class Locale;               // unicode/locid.h
83 class StringCharacterIterator;
84 class UnicodeStringAppendable;  // unicode/appendable.h
85 
86 /* The <iostream> include has been moved to unicode/ustream.h */
87 
88 /**
89  * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
90  * which constructs a Unicode string from an invariant-character char * string.
91  * About invariant characters see utypes.h.
92  * This constructor has no runtime dependency on conversion code and is
93  * therefore recommended over ones taking a charset name string
94  * (where the empty string "" indicates invariant-character conversion).
95  *
96  * @stable ICU 3.2
97  */
98 #define US_INV icu::UnicodeString::kInvariant
99 
100 /**
101  * \def UNICODE_STRING
102  * Obsolete macro approximating UnicodeString literals.
103  *
104  * Prior to the availability of C++11 and u"UTF-16 string literals",
105  * this macro was provided for portability and efficiency when
106  * initializing UnicodeStrings from literals.
107  *
108  * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
109  * length determination:
110  * \code
111  * UnicodeString str(u"literal");
112  * if (str == u"other literal") { ... }
113  * \endcode
114  *
115  * The string parameter must be a C string literal.
116  * The length of the string, not including the terminating
117  * `NUL`, must be specified as a constant.
118  * @stable ICU 2.0
119  */
120 #if !U_CHAR16_IS_TYPEDEF
121 # define UNICODE_STRING(cs, _length) icu::UnicodeString(true, u ## cs, _length)
122 #else
123 # define UNICODE_STRING(cs, _length) icu::UnicodeString(true, (const char16_t*)u ## cs, _length)
124 #endif
125 
126 /**
127  * Unicode String literals in C++.
128  * Obsolete macro approximating UnicodeString literals.
129  * See UNICODE_STRING.
130  *
131  * The string parameter must be a C string literal.
132  * @stable ICU 2.0
133  * @see UNICODE_STRING
134  */
135 #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
136 
137 /**
138  * \def UNISTR_FROM_CHAR_EXPLICIT
139  * This can be defined to be empty or "explicit".
140  * If explicit, then the UnicodeString(char16_t) and UnicodeString(UChar32)
141  * constructors are marked as explicit, preventing their inadvertent use.
142  * @stable ICU 49
143  */
144 #ifndef UNISTR_FROM_CHAR_EXPLICIT
145 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
146     // Auto-"explicit" in ICU library code.
147 #   define UNISTR_FROM_CHAR_EXPLICIT explicit
148 # else
149     // Empty by default for source code compatibility.
150 #   define UNISTR_FROM_CHAR_EXPLICIT
151 # endif
152 #endif
153 
154 /**
155  * \def UNISTR_FROM_STRING_EXPLICIT
156  * This can be defined to be empty or "explicit".
157  * If explicit, then the UnicodeString(const char *) and UnicodeString(const char16_t *)
158  * constructors are marked as explicit, preventing their inadvertent use.
159  *
160  * In particular, this helps prevent accidentally depending on ICU conversion code
161  * by passing a string literal into an API with a const UnicodeString & parameter.
162  * @stable ICU 49
163  */
164 #ifndef UNISTR_FROM_STRING_EXPLICIT
165 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
166     // Auto-"explicit" in ICU library code.
167 #   define UNISTR_FROM_STRING_EXPLICIT explicit
168 # else
169     // Empty by default for source code compatibility.
170 #   define UNISTR_FROM_STRING_EXPLICIT
171 # endif
172 #endif
173 
174 /**
175  * \def UNISTR_OBJECT_SIZE
176  * Desired sizeof(UnicodeString) in bytes.
177  * It should be a multiple of sizeof(pointer) to avoid unusable space for padding.
178  * The object size may want to be a multiple of 16 bytes,
179  * which is a common granularity for heap allocation.
180  *
181  * Any space inside the object beyond sizeof(vtable pointer) + 2
182  * is available for storing short strings inside the object.
183  * The bigger the object, the longer a string that can be stored inside the object,
184  * without additional heap allocation.
185  *
186  * Depending on a platform's pointer size, pointer alignment requirements,
187  * and struct padding, the compiler will usually round up sizeof(UnicodeString)
188  * to 4 * sizeof(pointer) (or 3 * sizeof(pointer) for P128 data models),
189  * to hold the fields for heap-allocated strings.
190  * Such a minimum size also ensures that the object is easily large enough
191  * to hold at least 2 char16_ts, for one supplementary code point (U16_MAX_LENGTH).
192  *
193  * sizeof(UnicodeString) >= 48 should work for all known platforms.
194  *
195  * For example, on a 64-bit machine where sizeof(vtable pointer) is 8,
196  * sizeof(UnicodeString) = 64 would leave space for
197  * (64 - sizeof(vtable pointer) - 2) / U_SIZEOF_UCHAR = (64 - 8 - 2) / 2 = 27
198  * char16_ts stored inside the object.
199  *
200  * The minimum object size on a 64-bit machine would be
201  * 4 * sizeof(pointer) = 4 * 8 = 32 bytes,
202  * and the internal buffer would hold up to 11 char16_ts in that case.
203  *
204  * @see U16_MAX_LENGTH
205  * @stable ICU 56
206  */
207 #ifndef UNISTR_OBJECT_SIZE
208 # define UNISTR_OBJECT_SIZE 64
209 #endif
210 
211 /**
212  * UnicodeString is a string class that stores Unicode characters directly and provides
213  * similar functionality as the Java String and StringBuffer/StringBuilder classes.
214  * It is a concrete implementation of the abstract class Replaceable (for transliteration).
215  *
216  * The UnicodeString equivalent of std::string’s clear() is remove().
217  *
218  * A UnicodeString may "alias" an external array of characters
219  * (that is, point to it, rather than own the array)
220  * whose lifetime must then at least match the lifetime of the aliasing object.
221  * This aliasing may be preserved when returning a UnicodeString by value,
222  * depending on the compiler and the function implementation,
223  * via Return Value Optimization (RVO) or the move assignment operator.
224  * (However, the copy assignment operator does not preserve aliasing.)
225  * For details see the description of storage models at the end of the class API docs
226  * and in the User Guide chapter linked from there.
227  *
228  * The UnicodeString class is not suitable for subclassing.
229  *
230  * For an overview of Unicode strings in C and C++ see the
231  * [User Guide Strings chapter](https://unicode-org.github.io/icu/userguide/strings#strings-in-cc).
232  *
233  * In ICU, a Unicode string consists of 16-bit Unicode *code units*.
234  * A Unicode character may be stored with either one code unit
235  * (the most common case) or with a matched pair of special code units
236  * ("surrogates"). The data type for code units is char16_t.
237  * For single-character handling, a Unicode character code *point* is a value
238  * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.
239  *
240  * Indexes and offsets into and lengths of strings always count code units, not code points.
241  * This is the same as with multi-byte char* strings in traditional string handling.
242  * Operations on partial strings typically do not test for code point boundaries.
243  * If necessary, the user needs to take care of such boundaries by testing for the code unit
244  * values or by using functions like
245  * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
246  * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).
247  *
248  * UnicodeString methods are more lenient with regard to input parameter values
249  * than other ICU APIs. In particular:
250  * - If indexes are out of bounds for a UnicodeString object
251  *   (< 0 or > length()) then they are "pinned" to the nearest boundary.
252  * - If the buffer passed to an insert/append/replace operation is owned by the
253  *   target object, e.g., calling str.append(str), an extra copy may take place
254  *   to ensure safety.
255  * - If primitive string pointer values (e.g., const char16_t * or char *)
256  *   for input strings are nullptr, then those input string parameters are treated
257  *   as if they pointed to an empty string.
258  *   However, this is *not* the case for char * parameters for charset names
259  *   or other IDs.
260  * - Most UnicodeString methods do not take a UErrorCode parameter because
261  *   there are usually very few opportunities for failure other than a shortage
262  *   of memory, error codes in low-level C++ string methods would be inconvenient,
263  *   and the error code as the last parameter (ICU convention) would prevent
264  *   the use of default parameter values.
265  *   Instead, such methods set the UnicodeString into a "bogus" state
266  *   (see isBogus()) if an error occurs.
267  *
268  * In string comparisons, two UnicodeString objects that are both "bogus"
269  * compare equal (to be transitive and prevent endless loops in sorting),
270  * and a "bogus" string compares less than any non-"bogus" one.
271  *
272  * Const UnicodeString methods are thread-safe. Multiple threads can use
273  * const methods on the same UnicodeString object simultaneously,
274  * but non-const methods must not be called concurrently (in multiple threads)
275  * with any other (const or non-const) methods.
276  *
277  * Similarly, const UnicodeString & parameters are thread-safe.
278  * One object may be passed in as such a parameter concurrently in multiple threads.
279  * This includes the const UnicodeString & parameters for
280  * copy construction, assignment, and cloning.
281  *
282  * UnicodeString uses several storage methods.
283  * String contents can be stored inside the UnicodeString object itself,
284  * in an allocated and shared buffer, or in an outside buffer that is "aliased".
285  * Most of this is done transparently, but careful aliasing in particular provides
286  * significant performance improvements.
287  * Also, the internal buffer is accessible via special functions.
288  * For details see the
289  * [User Guide Strings chapter](https://unicode-org.github.io/icu/userguide/strings#maximizing-performance-with-the-unicodestring-storage-model).
290  *
291  * @see utf.h
292  * @see CharacterIterator
293  * @stable ICU 2.0
294  */
295 class U_COMMON_API UnicodeString : public Replaceable
296 {
297 public:
298 
299   /**
300    * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
301    * which constructs a Unicode string from an invariant-character char * string.
302    * Use the macro US_INV instead of the full qualification for this value.
303    *
304    * @see US_INV
305    * @stable ICU 3.2
306    */
307   enum EInvariant {
308     /**
309      * @see EInvariant
310      * @stable ICU 3.2
311      */
312     kInvariant
313   };
314 
315   //========================================
316   // Read-only operations
317   //========================================
318 
319   /* Comparison - bitwise only - for international comparison use collation */
320 
321   /**
322    * Equality operator. Performs only bitwise comparison.
323    * @param text The UnicodeString to compare to this one.
324    * @return true if `text` contains the same characters as this one,
325    * false otherwise.
326    * @stable ICU 2.0
327    */
328   inline bool operator== (const UnicodeString& text) const;
329 
330 #ifndef U_HIDE_DRAFT_API
331   /**
332    * Equality operator. Performs only bitwise comparison with `text`
333    * which is, or which is implicitly convertible to,
334    * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.
335    *
336    * For performance, you can use UTF-16 string literals with compile-time
337    * length determination:
338    * \code
339    * UnicodeString str = ...;
340    * if (str == u"literal") { ... }
341    * \endcode
342    * @param text The string view to compare to this string.
343    * @return true if `text` contains the same characters as this one, false otherwise.
344    * @draft ICU 76
345    */
346   template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
347   inline bool operator==(const S &text) const {
348     std::u16string_view sv(internal::toU16StringView(text));
349     uint32_t len;  // unsigned to avoid a compiler warning
350     return !isBogus() && (len = length()) == sv.length() && doEquals(sv.data(), len);
351   }
352 #endif  // U_HIDE_DRAFT_API
353 
354   /**
355    * Inequality operator. Performs only bitwise comparison.
356    * @param text The UnicodeString to compare to this one.
357    * @return false if `text` contains the same characters as this one,
358    * true otherwise.
359    * @stable ICU 2.0
360    */
361   inline bool operator!= (const UnicodeString& text) const;
362 
363 #ifndef U_HIDE_DRAFT_API
364   /**
365    * Inequality operator. Performs only bitwise comparison with `text`
366    * which is, or which is implicitly convertible to,
367    * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.
368    *
369    * For performance, you can use std::u16string_view literals with compile-time
370    * length determination:
371    * \code
372    * #include &lt;string_view&gt;
373    * using namespace std::string_view_literals;
374    * UnicodeString str = ...;
375    * if (str != u"literal"sv) { ... }
376    * \endcode
377    * @param text The string view to compare to this string.
378    * @return false if `text` contains the same characters as this one, true otherwise.
379    * @draft ICU 76
380    */
381   template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
382   inline bool operator!=(const S &text) const {
383     return !operator==(text);
384   }
385 #endif  // U_HIDE_DRAFT_API
386 
387   /**
388    * Greater than operator. Performs only bitwise comparison.
389    * @param text The UnicodeString to compare to this one.
390    * @return true if the characters in this are bitwise
391    * greater than the characters in `text`, false otherwise
392    * @stable ICU 2.0
393    */
394   inline UBool operator> (const UnicodeString& text) const;
395 
396   /**
397    * Less than operator. Performs only bitwise comparison.
398    * @param text The UnicodeString to compare to this one.
399    * @return true if the characters in this are bitwise
400    * less than the characters in `text`, false otherwise
401    * @stable ICU 2.0
402    */
403   inline UBool operator< (const UnicodeString& text) const;
404 
405   /**
406    * Greater than or equal operator. Performs only bitwise comparison.
407    * @param text The UnicodeString to compare to this one.
408    * @return true if the characters in this are bitwise
409    * greater than or equal to the characters in `text`, false otherwise
410    * @stable ICU 2.0
411    */
412   inline UBool operator>= (const UnicodeString& text) const;
413 
414   /**
415    * Less than or equal operator. Performs only bitwise comparison.
416    * @param text The UnicodeString to compare to this one.
417    * @return true if the characters in this are bitwise
418    * less than or equal to the characters in `text`, false otherwise
419    * @stable ICU 2.0
420    */
421   inline UBool operator<= (const UnicodeString& text) const;
422 
423   /**
424    * Compare the characters bitwise in this UnicodeString to
425    * the characters in `text`.
426    * @param text The UnicodeString to compare to this one.
427    * @return The result of bitwise character comparison: 0 if this
428    * contains the same characters as `text`, -1 if the characters in
429    * this are bitwise less than the characters in `text`, +1 if the
430    * characters in this are bitwise greater than the characters
431    * in `text`.
432    * @stable ICU 2.0
433    */
434   inline int8_t compare(const UnicodeString& text) const;
435 
436   /**
437    * Compare the characters bitwise in the range
438    * [`start`, `start + length`) with the characters
439    * in the **entire string** `text`.
440    * (The parameters "start" and "length" are not applied to the other text "text".)
441    * @param start the offset at which the compare operation begins
442    * @param length the number of characters of text to compare.
443    * @param text the other text to be compared against this string.
444    * @return The result of bitwise character comparison: 0 if this
445    * contains the same characters as `text`, -1 if the characters in
446    * this are bitwise less than the characters in `text`, +1 if the
447    * characters in this are bitwise greater than the characters
448    * in `text`.
449    * @stable ICU 2.0
450    */
451   inline int8_t compare(int32_t start,
452          int32_t length,
453          const UnicodeString& text) const;
454 
455   /**
456    * Compare the characters bitwise in the range
457    * [`start`, `start + length`) with the characters
458    * in `srcText` in the range
459    * [`srcStart`, `srcStart + srcLength`).
460    * @param start the offset at which the compare operation begins
461    * @param length the number of characters in this to compare.
462    * @param srcText the text to be compared
463    * @param srcStart the offset into `srcText` to start comparison
464    * @param srcLength the number of characters in `src` to compare
465    * @return The result of bitwise character comparison: 0 if this
466    * contains the same characters as `srcText`, -1 if the characters in
467    * this are bitwise less than the characters in `srcText`, +1 if the
468    * characters in this are bitwise greater than the characters
469    * in `srcText`.
470    * @stable ICU 2.0
471    */
472    inline int8_t compare(int32_t start,
473          int32_t length,
474          const UnicodeString& srcText,
475          int32_t srcStart,
476          int32_t srcLength) const;
477 
478   /**
479    * Compare the characters bitwise in this UnicodeString with the first
480    * `srcLength` characters in `srcChars`.
481    * @param srcChars The characters to compare to this UnicodeString.
482    * @param srcLength the number of characters in `srcChars` to compare
483    * @return The result of bitwise character comparison: 0 if this
484    * contains the same characters as `srcChars`, -1 if the characters in
485    * this are bitwise less than the characters in `srcChars`, +1 if the
486    * characters in this are bitwise greater than the characters
487    * in `srcChars`.
488    * @stable ICU 2.0
489    */
490   inline int8_t compare(ConstChar16Ptr srcChars,
491          int32_t srcLength) const;
492 
493   /**
494    * Compare the characters bitwise in the range
495    * [`start`, `start + length`) with the first
496    * `length` characters in `srcChars`
497    * @param start the offset at which the compare operation begins
498    * @param length the number of characters to compare.
499    * @param srcChars the characters to be compared
500    * @return The result of bitwise character comparison: 0 if this
501    * contains the same characters as `srcChars`, -1 if the characters in
502    * this are bitwise less than the characters in `srcChars`, +1 if the
503    * characters in this are bitwise greater than the characters
504    * in `srcChars`.
505    * @stable ICU 2.0
506    */
507   inline int8_t compare(int32_t start,
508          int32_t length,
509          const char16_t *srcChars) const;
510 
511   /**
512    * Compare the characters bitwise in the range
513    * [`start`, `start + length`) with the characters
514    * in `srcChars` in the range
515    * [`srcStart`, `srcStart + srcLength`).
516    * @param start the offset at which the compare operation begins
517    * @param length the number of characters in this to compare
518    * @param srcChars the characters to be compared
519    * @param srcStart the offset into `srcChars` to start comparison
520    * @param srcLength the number of characters in `srcChars` to compare
521    * @return The result of bitwise character comparison: 0 if this
522    * contains the same characters as `srcChars`, -1 if the characters in
523    * this are bitwise less than the characters in `srcChars`, +1 if the
524    * characters in this are bitwise greater than the characters
525    * in `srcChars`.
526    * @stable ICU 2.0
527    */
528   inline int8_t compare(int32_t start,
529          int32_t length,
530          const char16_t *srcChars,
531          int32_t srcStart,
532          int32_t srcLength) const;
533 
534   /**
535    * Compare the characters bitwise in the range
536    * [`start`, `limit`) with the characters
537    * in `srcText` in the range
538    * [`srcStart`, `srcLimit`).
539    * @param start the offset at which the compare operation begins
540    * @param limit the offset immediately following the compare operation
541    * @param srcText the text to be compared
542    * @param srcStart the offset into `srcText` to start comparison
543    * @param srcLimit the offset into `srcText` to limit comparison
544    * @return The result of bitwise character comparison: 0 if this
545    * contains the same characters as `srcText`, -1 if the characters in
546    * this are bitwise less than the characters in `srcText`, +1 if the
547    * characters in this are bitwise greater than the characters
548    * in `srcText`.
549    * @stable ICU 2.0
550    */
551   inline int8_t compareBetween(int32_t start,
552             int32_t limit,
553             const UnicodeString& srcText,
554             int32_t srcStart,
555             int32_t srcLimit) const;
556 
557   /**
558    * Compare two Unicode strings in code point order.
559    * The result may be different from the results of compare(), operator<, etc.
560    * if supplementary characters are present:
561    *
562    * In UTF-16, supplementary characters (with code points U+10000 and above) are
563    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
564    * which means that they compare as less than some other BMP characters like U+feff.
565    * This function compares Unicode strings in code point order.
566    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
567    *
568    * @param text Another string to compare this one to.
569    * @return a negative/zero/positive integer corresponding to whether
570    * this string is less than/equal to/greater than the second one
571    * in code point order
572    * @stable ICU 2.0
573    */
574   inline int8_t compareCodePointOrder(const UnicodeString& text) const;
575 
576   /**
577    * Compare two Unicode strings in code point order.
578    * The result may be different from the results of compare(), operator<, etc.
579    * if supplementary characters are present:
580    *
581    * In UTF-16, supplementary characters (with code points U+10000 and above) are
582    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
583    * which means that they compare as less than some other BMP characters like U+feff.
584    * This function compares Unicode strings in code point order.
585    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
586    *
587    * @param start The start offset in this string at which the compare operation begins.
588    * @param length The number of code units from this string to compare.
589    * @param srcText Another string to compare this one to.
590    * @return a negative/zero/positive integer corresponding to whether
591    * this string is less than/equal to/greater than the second one
592    * in code point order
593    * @stable ICU 2.0
594    */
595   inline int8_t compareCodePointOrder(int32_t start,
596                                       int32_t length,
597                                       const UnicodeString& srcText) const;
598 
599   /**
600    * Compare two Unicode strings in code point order.
601    * The result may be different from the results of compare(), operator<, etc.
602    * if supplementary characters are present:
603    *
604    * In UTF-16, supplementary characters (with code points U+10000 and above) are
605    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
606    * which means that they compare as less than some other BMP characters like U+feff.
607    * This function compares Unicode strings in code point order.
608    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
609    *
610    * @param start The start offset in this string at which the compare operation begins.
611    * @param length The number of code units from this string to compare.
612    * @param srcText Another string to compare this one to.
613    * @param srcStart The start offset in that string at which the compare operation begins.
614    * @param srcLength The number of code units from that string to compare.
615    * @return a negative/zero/positive integer corresponding to whether
616    * this string is less than/equal to/greater than the second one
617    * in code point order
618    * @stable ICU 2.0
619    */
620    inline int8_t compareCodePointOrder(int32_t start,
621                                        int32_t length,
622                                        const UnicodeString& srcText,
623                                        int32_t srcStart,
624                                        int32_t srcLength) const;
625 
626   /**
627    * Compare two Unicode strings in code point order.
628    * The result may be different from the results of compare(), operator<, etc.
629    * if supplementary characters are present:
630    *
631    * In UTF-16, supplementary characters (with code points U+10000 and above) are
632    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
633    * which means that they compare as less than some other BMP characters like U+feff.
634    * This function compares Unicode strings in code point order.
635    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
636    *
637    * @param srcChars A pointer to another string to compare this one to.
638    * @param srcLength The number of code units from that string to compare.
639    * @return a negative/zero/positive integer corresponding to whether
640    * this string is less than/equal to/greater than the second one
641    * in code point order
642    * @stable ICU 2.0
643    */
644   inline int8_t compareCodePointOrder(ConstChar16Ptr srcChars,
645                                       int32_t srcLength) const;
646 
647   /**
648    * Compare two Unicode strings in code point order.
649    * The result may be different from the results of compare(), operator<, etc.
650    * if supplementary characters are present:
651    *
652    * In UTF-16, supplementary characters (with code points U+10000 and above) are
653    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
654    * which means that they compare as less than some other BMP characters like U+feff.
655    * This function compares Unicode strings in code point order.
656    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
657    *
658    * @param start The start offset in this string at which the compare operation begins.
659    * @param length The number of code units from this string to compare.
660    * @param srcChars A pointer to another string to compare this one to.
661    * @return a negative/zero/positive integer corresponding to whether
662    * this string is less than/equal to/greater than the second one
663    * in code point order
664    * @stable ICU 2.0
665    */
666   inline int8_t compareCodePointOrder(int32_t start,
667                                       int32_t length,
668                                       const char16_t *srcChars) const;
669 
670   /**
671    * Compare two Unicode strings in code point order.
672    * The result may be different from the results of compare(), operator<, etc.
673    * if supplementary characters are present:
674    *
675    * In UTF-16, supplementary characters (with code points U+10000 and above) are
676    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
677    * which means that they compare as less than some other BMP characters like U+feff.
678    * This function compares Unicode strings in code point order.
679    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
680    *
681    * @param start The start offset in this string at which the compare operation begins.
682    * @param length The number of code units from this string to compare.
683    * @param srcChars A pointer to another string to compare this one to.
684    * @param srcStart The start offset in that string at which the compare operation begins.
685    * @param srcLength The number of code units from that string to compare.
686    * @return a negative/zero/positive integer corresponding to whether
687    * this string is less than/equal to/greater than the second one
688    * in code point order
689    * @stable ICU 2.0
690    */
691   inline int8_t compareCodePointOrder(int32_t start,
692                                       int32_t length,
693                                       const char16_t *srcChars,
694                                       int32_t srcStart,
695                                       int32_t srcLength) const;
696 
697   /**
698    * Compare two Unicode strings in code point order.
699    * The result may be different from the results of compare(), operator<, etc.
700    * if supplementary characters are present:
701    *
702    * In UTF-16, supplementary characters (with code points U+10000 and above) are
703    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
704    * which means that they compare as less than some other BMP characters like U+feff.
705    * This function compares Unicode strings in code point order.
706    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
707    *
708    * @param start The start offset in this string at which the compare operation begins.
709    * @param limit The offset after the last code unit from this string to compare.
710    * @param srcText Another string to compare this one to.
711    * @param srcStart The start offset in that string at which the compare operation begins.
712    * @param srcLimit The offset after the last code unit from that string to compare.
713    * @return a negative/zero/positive integer corresponding to whether
714    * this string is less than/equal to/greater than the second one
715    * in code point order
716    * @stable ICU 2.0
717    */
718   inline int8_t compareCodePointOrderBetween(int32_t start,
719                                              int32_t limit,
720                                              const UnicodeString& srcText,
721                                              int32_t srcStart,
722                                              int32_t srcLimit) const;
723 
724   /**
725    * Compare two strings case-insensitively using full case folding.
726    * This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
727    *
728    * @param text Another string to compare this one to.
729    * @param options A bit set of options:
730    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
731    *     Comparison in code unit order with default case folding.
732    *
733    *   - U_COMPARE_CODE_POINT_ORDER
734    *     Set to choose code point order instead of code unit order
735    *     (see u_strCompare for details).
736    *
737    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
738    *
739    * @return A negative, zero, or positive integer indicating the comparison result.
740    * @stable ICU 2.0
741    */
742   inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
743 
744   /**
745    * Compare two strings case-insensitively using full case folding.
746    * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
747    *
748    * @param start The start offset in this string at which the compare operation begins.
749    * @param length The number of code units from this string to compare.
750    * @param srcText Another string to compare this one to.
751    * @param options A bit set of options:
752    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
753    *     Comparison in code unit order with default case folding.
754    *
755    *   - U_COMPARE_CODE_POINT_ORDER
756    *     Set to choose code point order instead of code unit order
757    *     (see u_strCompare for details).
758    *
759    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
760    *
761    * @return A negative, zero, or positive integer indicating the comparison result.
762    * @stable ICU 2.0
763    */
764   inline int8_t caseCompare(int32_t start,
765          int32_t length,
766          const UnicodeString& srcText,
767          uint32_t options) const;
768 
769   /**
770    * Compare two strings case-insensitively using full case folding.
771    * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
772    *
773    * @param start The start offset in this string at which the compare operation begins.
774    * @param length The number of code units from this string to compare.
775    * @param srcText Another string to compare this one to.
776    * @param srcStart The start offset in that string at which the compare operation begins.
777    * @param srcLength The number of code units from that string to compare.
778    * @param options A bit set of options:
779    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
780    *     Comparison in code unit order with default case folding.
781    *
782    *   - U_COMPARE_CODE_POINT_ORDER
783    *     Set to choose code point order instead of code unit order
784    *     (see u_strCompare for details).
785    *
786    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
787    *
788    * @return A negative, zero, or positive integer indicating the comparison result.
789    * @stable ICU 2.0
790    */
791   inline int8_t caseCompare(int32_t start,
792          int32_t length,
793          const UnicodeString& srcText,
794          int32_t srcStart,
795          int32_t srcLength,
796          uint32_t options) const;
797 
798   /**
799    * Compare two strings case-insensitively using full case folding.
800    * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
801    *
802    * @param srcChars A pointer to another string to compare this one to.
803    * @param srcLength The number of code units from that string to compare.
804    * @param options A bit set of options:
805    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
806    *     Comparison in code unit order with default case folding.
807    *
808    *   - U_COMPARE_CODE_POINT_ORDER
809    *     Set to choose code point order instead of code unit order
810    *     (see u_strCompare for details).
811    *
812    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
813    *
814    * @return A negative, zero, or positive integer indicating the comparison result.
815    * @stable ICU 2.0
816    */
817   inline int8_t caseCompare(ConstChar16Ptr srcChars,
818          int32_t srcLength,
819          uint32_t options) const;
820 
821   /**
822    * Compare two strings case-insensitively using full case folding.
823    * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
824    *
825    * @param start The start offset in this string at which the compare operation begins.
826    * @param length The number of code units from this string to compare.
827    * @param srcChars A pointer to another string to compare this one to.
828    * @param options A bit set of options:
829    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
830    *     Comparison in code unit order with default case folding.
831    *
832    *   - U_COMPARE_CODE_POINT_ORDER
833    *     Set to choose code point order instead of code unit order
834    *     (see u_strCompare for details).
835    *
836    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
837    *
838    * @return A negative, zero, or positive integer indicating the comparison result.
839    * @stable ICU 2.0
840    */
841   inline int8_t caseCompare(int32_t start,
842          int32_t length,
843          const char16_t *srcChars,
844          uint32_t options) const;
845 
846   /**
847    * Compare two strings case-insensitively using full case folding.
848    * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
849    *
850    * @param start The start offset in this string at which the compare operation begins.
851    * @param length The number of code units from this string to compare.
852    * @param srcChars A pointer to another string to compare this one to.
853    * @param srcStart The start offset in that string at which the compare operation begins.
854    * @param srcLength The number of code units from that string to compare.
855    * @param options A bit set of options:
856    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
857    *     Comparison in code unit order with default case folding.
858    *
859    *   - U_COMPARE_CODE_POINT_ORDER
860    *     Set to choose code point order instead of code unit order
861    *     (see u_strCompare for details).
862    *
863    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
864    *
865    * @return A negative, zero, or positive integer indicating the comparison result.
866    * @stable ICU 2.0
867    */
868   inline int8_t caseCompare(int32_t start,
869          int32_t length,
870          const char16_t *srcChars,
871          int32_t srcStart,
872          int32_t srcLength,
873          uint32_t options) const;
874 
875   /**
876    * Compare two strings case-insensitively using full case folding.
877    * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
878    *
879    * @param start The start offset in this string at which the compare operation begins.
880    * @param limit The offset after the last code unit from this string to compare.
881    * @param srcText Another string to compare this one to.
882    * @param srcStart The start offset in that string at which the compare operation begins.
883    * @param srcLimit The offset after the last code unit from that string to compare.
884    * @param options A bit set of options:
885    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
886    *     Comparison in code unit order with default case folding.
887    *
888    *   - U_COMPARE_CODE_POINT_ORDER
889    *     Set to choose code point order instead of code unit order
890    *     (see u_strCompare for details).
891    *
892    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
893    *
894    * @return A negative, zero, or positive integer indicating the comparison result.
895    * @stable ICU 2.0
896    */
897   inline int8_t caseCompareBetween(int32_t start,
898             int32_t limit,
899             const UnicodeString& srcText,
900             int32_t srcStart,
901             int32_t srcLimit,
902             uint32_t options) const;
903 
904   /**
905    * Determine if this starts with the characters in `text`
906    * @param text The text to match.
907    * @return true if this starts with the characters in `text`,
908    * false otherwise
909    * @stable ICU 2.0
910    */
911   inline UBool startsWith(const UnicodeString& text) const;
912 
913   /**
914    * Determine if this starts with the characters in `srcText`
915    * in the range [`srcStart`, `srcStart + srcLength`).
916    * @param srcText The text to match.
917    * @param srcStart the offset into `srcText` to start matching
918    * @param srcLength the number of characters in `srcText` to match
919    * @return true if this starts with the characters in `text`,
920    * false otherwise
921    * @stable ICU 2.0
922    */
923   inline UBool startsWith(const UnicodeString& srcText,
924             int32_t srcStart,
925             int32_t srcLength) const;
926 
927   /**
928    * Determine if this starts with the characters in `srcChars`
929    * @param srcChars The characters to match.
930    * @param srcLength the number of characters in `srcChars`
931    * @return true if this starts with the characters in `srcChars`,
932    * false otherwise
933    * @stable ICU 2.0
934    */
935   inline UBool startsWith(ConstChar16Ptr srcChars,
936             int32_t srcLength) const;
937 
938   /**
939    * Determine if this ends with the characters in `srcChars`
940    * in the range  [`srcStart`, `srcStart + srcLength`).
941    * @param srcChars The characters to match.
942    * @param srcStart the offset into `srcText` to start matching
943    * @param srcLength the number of characters in `srcChars` to match
944    * @return true if this ends with the characters in `srcChars`, false otherwise
945    * @stable ICU 2.0
946    */
947   inline UBool startsWith(const char16_t *srcChars,
948             int32_t srcStart,
949             int32_t srcLength) const;
950 
951   /**
952    * Determine if this ends with the characters in `text`
953    * @param text The text to match.
954    * @return true if this ends with the characters in `text`,
955    * false otherwise
956    * @stable ICU 2.0
957    */
958   inline UBool endsWith(const UnicodeString& text) const;
959 
960   /**
961    * Determine if this ends with the characters in `srcText`
962    * in the range [`srcStart`, `srcStart + srcLength`).
963    * @param srcText The text to match.
964    * @param srcStart the offset into `srcText` to start matching
965    * @param srcLength the number of characters in `srcText` to match
966    * @return true if this ends with the characters in `text`,
967    * false otherwise
968    * @stable ICU 2.0
969    */
970   inline UBool endsWith(const UnicodeString& srcText,
971           int32_t srcStart,
972           int32_t srcLength) const;
973 
974   /**
975    * Determine if this ends with the characters in `srcChars`
976    * @param srcChars The characters to match.
977    * @param srcLength the number of characters in `srcChars`
978    * @return true if this ends with the characters in `srcChars`,
979    * false otherwise
980    * @stable ICU 2.0
981    */
982   inline UBool endsWith(ConstChar16Ptr srcChars,
983           int32_t srcLength) const;
984 
985   /**
986    * Determine if this ends with the characters in `srcChars`
987    * in the range  [`srcStart`, `srcStart + srcLength`).
988    * @param srcChars The characters to match.
989    * @param srcStart the offset into `srcText` to start matching
990    * @param srcLength the number of characters in `srcChars` to match
991    * @return true if this ends with the characters in `srcChars`,
992    * false otherwise
993    * @stable ICU 2.0
994    */
995   inline UBool endsWith(const char16_t *srcChars,
996           int32_t srcStart,
997           int32_t srcLength) const;
998 
999 
1000   /* Searching - bitwise only */
1001 
1002   /**
1003    * Locate in this the first occurrence of the characters in `text`,
1004    * using bitwise comparison.
1005    * @param text The text to search for.
1006    * @return The offset into this of the start of `text`,
1007    * or -1 if not found.
1008    * @stable ICU 2.0
1009    */
1010   inline int32_t indexOf(const UnicodeString& text) const;
1011 
1012   /**
1013    * Locate in this the first occurrence of the characters in `text`
1014    * starting at offset `start`, using bitwise comparison.
1015    * @param text The text to search for.
1016    * @param start The offset at which searching will start.
1017    * @return The offset into this of the start of `text`,
1018    * or -1 if not found.
1019    * @stable ICU 2.0
1020    */
1021   inline int32_t indexOf(const UnicodeString& text,
1022               int32_t start) const;
1023 
1024   /**
1025    * Locate in this the first occurrence in the range
1026    * [`start`, `start + length`) of the characters
1027    * in `text`, using bitwise comparison.
1028    * @param text The text to search for.
1029    * @param start The offset at which searching will start.
1030    * @param length The number of characters to search
1031    * @return The offset into this of the start of `text`,
1032    * or -1 if not found.
1033    * @stable ICU 2.0
1034    */
1035   inline int32_t indexOf(const UnicodeString& text,
1036               int32_t start,
1037               int32_t length) const;
1038 
1039   /**
1040    * Locate in this the first occurrence in the range
1041    * [`start`, `start + length`) of the characters
1042    *  in `srcText` in the range
1043    * [`srcStart`, `srcStart + srcLength`),
1044    * using bitwise comparison.
1045    * @param srcText The text to search for.
1046    * @param srcStart the offset into `srcText` at which
1047    * to start matching
1048    * @param srcLength the number of characters in `srcText` to match
1049    * @param start the offset into this at which to start matching
1050    * @param length the number of characters in this to search
1051    * @return The offset into this of the start of `text`,
1052    * or -1 if not found.
1053    * @stable ICU 2.0
1054    */
1055   inline int32_t indexOf(const UnicodeString& srcText,
1056               int32_t srcStart,
1057               int32_t srcLength,
1058               int32_t start,
1059               int32_t length) const;
1060 
1061   /**
1062    * Locate in this the first occurrence of the characters in
1063    * `srcChars`
1064    * starting at offset `start`, using bitwise comparison.
1065    * @param srcChars The text to search for.
1066    * @param srcLength the number of characters in `srcChars` to match
1067    * @param start the offset into this at which to start matching
1068    * @return The offset into this of the start of `text`,
1069    * or -1 if not found.
1070    * @stable ICU 2.0
1071    */
1072   inline int32_t indexOf(const char16_t *srcChars,
1073               int32_t srcLength,
1074               int32_t start) const;
1075 
1076   /**
1077    * Locate in this the first occurrence in the range
1078    * [`start`, `start + length`) of the characters
1079    * in `srcChars`, using bitwise comparison.
1080    * @param srcChars The text to search for.
1081    * @param srcLength the number of characters in `srcChars`
1082    * @param start The offset at which searching will start.
1083    * @param length The number of characters to search
1084    * @return The offset into this of the start of `srcChars`,
1085    * or -1 if not found.
1086    * @stable ICU 2.0
1087    */
1088   inline int32_t indexOf(ConstChar16Ptr srcChars,
1089               int32_t srcLength,
1090               int32_t start,
1091               int32_t length) const;
1092 
1093   /**
1094    * Locate in this the first occurrence in the range
1095    * [`start`, `start + length`) of the characters
1096    * in `srcChars` in the range
1097    * [`srcStart`, `srcStart + srcLength`),
1098    * using bitwise comparison.
1099    * @param srcChars The text to search for.
1100    * @param srcStart the offset into `srcChars` at which
1101    * to start matching
1102    * @param srcLength the number of characters in `srcChars` to match
1103    * @param start the offset into this at which to start matching
1104    * @param length the number of characters in this to search
1105    * @return The offset into this of the start of `text`,
1106    * or -1 if not found.
1107    * @stable ICU 2.0
1108    */
1109   int32_t indexOf(const char16_t *srcChars,
1110               int32_t srcStart,
1111               int32_t srcLength,
1112               int32_t start,
1113               int32_t length) const;
1114 
1115   /**
1116    * Locate in this the first occurrence of the BMP code point `c`,
1117    * using bitwise comparison.
1118    * @param c The code unit to search for.
1119    * @return The offset into this of `c`, or -1 if not found.
1120    * @stable ICU 2.0
1121    */
1122   inline int32_t indexOf(char16_t c) const;
1123 
1124   /**
1125    * Locate in this the first occurrence of the code point `c`,
1126    * using bitwise comparison.
1127    *
1128    * @param c The code point to search for.
1129    * @return The offset into this of `c`, or -1 if not found.
1130    * @stable ICU 2.0
1131    */
1132   inline int32_t indexOf(UChar32 c) const;
1133 
1134   /**
1135    * Locate in this the first occurrence of the BMP code point `c`,
1136    * starting at offset `start`, using bitwise comparison.
1137    * @param c The code unit to search for.
1138    * @param start The offset at which searching will start.
1139    * @return The offset into this of `c`, or -1 if not found.
1140    * @stable ICU 2.0
1141    */
1142   inline int32_t indexOf(char16_t c,
1143               int32_t start) const;
1144 
1145   /**
1146    * Locate in this the first occurrence of the code point `c`
1147    * starting at offset `start`, using bitwise comparison.
1148    *
1149    * @param c The code point to search for.
1150    * @param start The offset at which searching will start.
1151    * @return The offset into this of `c`, or -1 if not found.
1152    * @stable ICU 2.0
1153    */
1154   inline int32_t indexOf(UChar32 c,
1155               int32_t start) const;
1156 
1157   /**
1158    * Locate in this the first occurrence of the BMP code point `c`
1159    * in the range [`start`, `start + length`),
1160    * using bitwise comparison.
1161    * @param c The code unit to search for.
1162    * @param start the offset into this at which to start matching
1163    * @param length the number of characters in this to search
1164    * @return The offset into this of `c`, or -1 if not found.
1165    * @stable ICU 2.0
1166    */
1167   inline int32_t indexOf(char16_t c,
1168               int32_t start,
1169               int32_t length) const;
1170 
1171   /**
1172    * Locate in this the first occurrence of the code point `c`
1173    * in the range [`start`, `start + length`),
1174    * using bitwise comparison.
1175    *
1176    * @param c The code point to search for.
1177    * @param start the offset into this at which to start matching
1178    * @param length the number of characters in this to search
1179    * @return The offset into this of `c`, or -1 if not found.
1180    * @stable ICU 2.0
1181    */
1182   inline int32_t indexOf(UChar32 c,
1183               int32_t start,
1184               int32_t length) const;
1185 
1186   /**
1187    * Locate in this the last occurrence of the characters in `text`,
1188    * using bitwise comparison.
1189    * @param text The text to search for.
1190    * @return The offset into this of the start of `text`,
1191    * or -1 if not found.
1192    * @stable ICU 2.0
1193    */
1194   inline int32_t lastIndexOf(const UnicodeString& text) const;
1195 
1196   /**
1197    * Locate in this the last occurrence of the characters in `text`
1198    * starting at offset `start`, using bitwise comparison.
1199    * @param text The text to search for.
1200    * @param start The offset at which searching will start.
1201    * @return The offset into this of the start of `text`,
1202    * or -1 if not found.
1203    * @stable ICU 2.0
1204    */
1205   inline int32_t lastIndexOf(const UnicodeString& text,
1206               int32_t start) const;
1207 
1208   /**
1209    * Locate in this the last occurrence in the range
1210    * [`start`, `start + length`) of the characters
1211    * in `text`, using bitwise comparison.
1212    * @param text The text to search for.
1213    * @param start The offset at which searching will start.
1214    * @param length The number of characters to search
1215    * @return The offset into this of the start of `text`,
1216    * or -1 if not found.
1217    * @stable ICU 2.0
1218    */
1219   inline int32_t lastIndexOf(const UnicodeString& text,
1220               int32_t start,
1221               int32_t length) const;
1222 
1223   /**
1224    * Locate in this the last occurrence in the range
1225    * [`start`, `start + length`) of the characters
1226    * in `srcText` in the range
1227    * [`srcStart`, `srcStart + srcLength`),
1228    * using bitwise comparison.
1229    * @param srcText The text to search for.
1230    * @param srcStart the offset into `srcText` at which
1231    * to start matching
1232    * @param srcLength the number of characters in `srcText` to match
1233    * @param start the offset into this at which to start matching
1234    * @param length the number of characters in this to search
1235    * @return The offset into this of the start of `text`,
1236    * or -1 if not found.
1237    * @stable ICU 2.0
1238    */
1239   inline int32_t lastIndexOf(const UnicodeString& srcText,
1240               int32_t srcStart,
1241               int32_t srcLength,
1242               int32_t start,
1243               int32_t length) const;
1244 
1245   /**
1246    * Locate in this the last occurrence of the characters in `srcChars`
1247    * starting at offset `start`, using bitwise comparison.
1248    * @param srcChars The text to search for.
1249    * @param srcLength the number of characters in `srcChars` to match
1250    * @param start the offset into this at which to start matching
1251    * @return The offset into this of the start of `text`,
1252    * or -1 if not found.
1253    * @stable ICU 2.0
1254    */
1255   inline int32_t lastIndexOf(const char16_t *srcChars,
1256               int32_t srcLength,
1257               int32_t start) const;
1258 
1259   /**
1260    * Locate in this the last occurrence in the range
1261    * [`start`, `start + length`) of the characters
1262    * in `srcChars`, using bitwise comparison.
1263    * @param srcChars The text to search for.
1264    * @param srcLength the number of characters in `srcChars`
1265    * @param start The offset at which searching will start.
1266    * @param length The number of characters to search
1267    * @return The offset into this of the start of `srcChars`,
1268    * or -1 if not found.
1269    * @stable ICU 2.0
1270    */
1271   inline int32_t lastIndexOf(ConstChar16Ptr srcChars,
1272               int32_t srcLength,
1273               int32_t start,
1274               int32_t length) const;
1275 
1276   /**
1277    * Locate in this the last occurrence in the range
1278    * [`start`, `start + length`) of the characters
1279    * in `srcChars` in the range
1280    * [`srcStart`, `srcStart + srcLength`),
1281    * using bitwise comparison.
1282    * @param srcChars The text to search for.
1283    * @param srcStart the offset into `srcChars` at which
1284    * to start matching
1285    * @param srcLength the number of characters in `srcChars` to match
1286    * @param start the offset into this at which to start matching
1287    * @param length the number of characters in this to search
1288    * @return The offset into this of the start of `text`,
1289    * or -1 if not found.
1290    * @stable ICU 2.0
1291    */
1292   int32_t lastIndexOf(const char16_t *srcChars,
1293               int32_t srcStart,
1294               int32_t srcLength,
1295               int32_t start,
1296               int32_t length) const;
1297 
1298   /**
1299    * Locate in this the last occurrence of the BMP code point `c`,
1300    * using bitwise comparison.
1301    * @param c The code unit to search for.
1302    * @return The offset into this of `c`, or -1 if not found.
1303    * @stable ICU 2.0
1304    */
1305   inline int32_t lastIndexOf(char16_t c) const;
1306 
1307   /**
1308    * Locate in this the last occurrence of the code point `c`,
1309    * using bitwise comparison.
1310    *
1311    * @param c The code point to search for.
1312    * @return The offset into this of `c`, or -1 if not found.
1313    * @stable ICU 2.0
1314    */
1315   inline int32_t lastIndexOf(UChar32 c) const;
1316 
1317   /**
1318    * Locate in this the last occurrence of the BMP code point `c`
1319    * starting at offset `start`, using bitwise comparison.
1320    * @param c The code unit to search for.
1321    * @param start The offset at which searching will start.
1322    * @return The offset into this of `c`, or -1 if not found.
1323    * @stable ICU 2.0
1324    */
1325   inline int32_t lastIndexOf(char16_t c,
1326               int32_t start) const;
1327 
1328   /**
1329    * Locate in this the last occurrence of the code point `c`
1330    * starting at offset `start`, using bitwise comparison.
1331    *
1332    * @param c The code point to search for.
1333    * @param start The offset at which searching will start.
1334    * @return The offset into this of `c`, or -1 if not found.
1335    * @stable ICU 2.0
1336    */
1337   inline int32_t lastIndexOf(UChar32 c,
1338               int32_t start) const;
1339 
1340   /**
1341    * Locate in this the last occurrence of the BMP code point `c`
1342    * in the range [`start`, `start + length`),
1343    * using bitwise comparison.
1344    * @param c The code unit to search for.
1345    * @param start the offset into this at which to start matching
1346    * @param length the number of characters in this to search
1347    * @return The offset into this of `c`, or -1 if not found.
1348    * @stable ICU 2.0
1349    */
1350   inline int32_t lastIndexOf(char16_t c,
1351               int32_t start,
1352               int32_t length) const;
1353 
1354   /**
1355    * Locate in this the last occurrence of the code point `c`
1356    * in the range [`start`, `start + length`),
1357    * using bitwise comparison.
1358    *
1359    * @param c The code point to search for.
1360    * @param start the offset into this at which to start matching
1361    * @param length the number of characters in this to search
1362    * @return The offset into this of `c`, or -1 if not found.
1363    * @stable ICU 2.0
1364    */
1365   inline int32_t lastIndexOf(UChar32 c,
1366               int32_t start,
1367               int32_t length) const;
1368 
1369 
1370   /* Character access */
1371 
1372   /**
1373    * Return the code unit at offset `offset`.
1374    * If the offset is not valid (0..length()-1) then U+ffff is returned.
1375    * @param offset a valid offset into the text
1376    * @return the code unit at offset `offset`
1377    *         or 0xffff if the offset is not valid for this string
1378    * @stable ICU 2.0
1379    */
1380   inline char16_t charAt(int32_t offset) const;
1381 
1382   /**
1383    * Return the code unit at offset `offset`.
1384    * If the offset is not valid (0..length()-1) then U+ffff is returned.
1385    * @param offset a valid offset into the text
1386    * @return the code unit at offset `offset`
1387    * @stable ICU 2.0
1388    */
1389   inline char16_t operator[] (int32_t offset) const;
1390 
1391   /**
1392    * Return the code point that contains the code unit
1393    * at offset `offset`.
1394    * If the offset is not valid (0..length()-1) then U+ffff is returned.
1395    * @param offset a valid offset into the text
1396    * that indicates the text offset of any of the code units
1397    * that will be assembled into a code point (21-bit value) and returned
1398    * @return the code point of text at `offset`
1399    *         or 0xffff if the offset is not valid for this string
1400    * @stable ICU 2.0
1401    */
1402   UChar32 char32At(int32_t offset) const;
1403 
1404   /**
1405    * Adjust a random-access offset so that
1406    * it points to the beginning of a Unicode character.
1407    * The offset that is passed in points to
1408    * any code unit of a code point,
1409    * while the returned offset will point to the first code unit
1410    * of the same code point.
1411    * In UTF-16, if the input offset points to a second surrogate
1412    * of a surrogate pair, then the returned offset will point
1413    * to the first surrogate.
1414    * @param offset a valid offset into one code point of the text
1415    * @return offset of the first code unit of the same code point
1416    * @see U16_SET_CP_START
1417    * @stable ICU 2.0
1418    */
1419   int32_t getChar32Start(int32_t offset) const;
1420 
1421   /**
1422    * Adjust a random-access offset so that
1423    * it points behind a Unicode character.
1424    * The offset that is passed in points behind
1425    * any code unit of a code point,
1426    * while the returned offset will point behind the last code unit
1427    * of the same code point.
1428    * In UTF-16, if the input offset points behind the first surrogate
1429    * (i.e., to the second surrogate)
1430    * of a surrogate pair, then the returned offset will point
1431    * behind the second surrogate (i.e., to the first surrogate).
1432    * @param offset a valid offset after any code unit of a code point of the text
1433    * @return offset of the first code unit after the same code point
1434    * @see U16_SET_CP_LIMIT
1435    * @stable ICU 2.0
1436    */
1437   int32_t getChar32Limit(int32_t offset) const;
1438 
1439   /**
1440    * Move the code unit index along the string by delta code points.
1441    * Interpret the input index as a code unit-based offset into the string,
1442    * move the index forward or backward by delta code points, and
1443    * return the resulting index.
1444    * The input index should point to the first code unit of a code point,
1445    * if there is more than one.
1446    *
1447    * Both input and output indexes are code unit-based as for all
1448    * string indexes/offsets in ICU (and other libraries, like MBCS char*).
1449    * If delta<0 then the index is moved backward (toward the start of the string).
1450    * If delta>0 then the index is moved forward (toward the end of the string).
1451    *
1452    * This behaves like CharacterIterator::move32(delta, kCurrent).
1453    *
1454    * Behavior for out-of-bounds indexes:
1455    * `moveIndex32` pins the input index to 0..length(), i.e.,
1456    * if the input index<0 then it is pinned to 0;
1457    * if it is index>length() then it is pinned to length().
1458    * Afterwards, the index is moved by `delta` code points
1459    * forward or backward,
1460    * but no further backward than to 0 and no further forward than to length().
1461    * The resulting index return value will be in between 0 and length(), inclusively.
1462    *
1463    * Examples:
1464    * \code
1465    *     // s has code points 'a' U+10000 'b' U+10ffff U+2029
1466    *     UnicodeString s(u"a\U00010000b\U0010ffff\u2029");
1467    *
1468    *     // initial index: position of U+10000
1469    *     int32_t index=1;
1470    *
1471    *     // the following examples will all result in index==4, position of U+10ffff
1472    *
1473    *     // skip 2 code points from some position in the string
1474    *     index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
1475    *
1476    *     // go to the 3rd code point from the start of s (0-based)
1477    *     index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
1478    *
1479    *     // go to the next-to-last code point of s
1480    *     index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
1481    * \endcode
1482    *
1483    * @param index input code unit index
1484    * @param delta (signed) code point count to move the index forward or backward
1485    *        in the string
1486    * @return the resulting code unit index
1487    * @stable ICU 2.0
1488    */
1489   int32_t moveIndex32(int32_t index, int32_t delta) const;
1490 
1491   /* Substring extraction */
1492 
1493   /**
1494    * Copy the characters in the range
1495    * [`start`, `start + length`) into the array `dst`,
1496    * beginning at `dstStart`.
1497    * If the string aliases to `dst` itself as an external buffer,
1498    * then extract() will not copy the contents.
1499    *
1500    * @param start offset of first character which will be copied into the array
1501    * @param length the number of characters to extract
1502    * @param dst array in which to copy characters.  The length of `dst`
1503    * must be at least (`dstStart + length`).
1504    * @param dstStart the offset in `dst` where the first character
1505    * will be extracted
1506    * @stable ICU 2.0
1507    */
1508   inline void extract(int32_t start,
1509            int32_t length,
1510            Char16Ptr dst,
1511            int32_t dstStart = 0) const;
1512 
1513   /**
1514    * Copy the contents of the string into dest.
1515    * This is a convenience function that
1516    * checks if there is enough space in dest,
1517    * extracts the entire string if possible,
1518    * and NUL-terminates dest if possible.
1519    *
1520    * If the string fits into dest but cannot be NUL-terminated
1521    * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
1522    * If the string itself does not fit into dest
1523    * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
1524    *
1525    * If the string aliases to `dest` itself as an external buffer,
1526    * then extract() will not copy the contents.
1527    *
1528    * @param dest Destination string buffer.
1529    * @param destCapacity Number of char16_ts available at dest.
1530    * @param errorCode ICU error code.
1531    * @return length()
1532    * @stable ICU 2.0
1533    */
1534   int32_t
1535   extract(Char16Ptr dest, int32_t destCapacity,
1536           UErrorCode &errorCode) const;
1537 
1538   /**
1539    * Copy the characters in the range
1540    * [`start`, `start + length`) into the  UnicodeString
1541    * `target`.
1542    * @param start offset of first character which will be copied
1543    * @param length the number of characters to extract
1544    * @param target UnicodeString into which to copy characters.
1545    * @stable ICU 2.0
1546    */
1547   inline void extract(int32_t start,
1548            int32_t length,
1549            UnicodeString& target) const;
1550 
1551   /**
1552    * Copy the characters in the range [`start`, `limit`)
1553    * into the array `dst`, beginning at `dstStart`.
1554    * @param start offset of first character which will be copied into the array
1555    * @param limit offset immediately following the last character to be copied
1556    * @param dst array in which to copy characters.  The length of `dst`
1557    * must be at least (`dstStart + (limit - start)`).
1558    * @param dstStart the offset in `dst` where the first character
1559    * will be extracted
1560    * @stable ICU 2.0
1561    */
1562   inline void extractBetween(int32_t start,
1563               int32_t limit,
1564               char16_t *dst,
1565               int32_t dstStart = 0) const;
1566 
1567   /**
1568    * Copy the characters in the range [`start`, `limit`)
1569    * into the UnicodeString `target`.  Replaceable API.
1570    * @param start offset of first character which will be copied
1571    * @param limit offset immediately following the last character to be copied
1572    * @param target UnicodeString into which to copy characters.
1573    * @stable ICU 2.0
1574    */
1575   virtual void extractBetween(int32_t start,
1576               int32_t limit,
1577               UnicodeString& target) const override;
1578 
1579   /**
1580    * Copy the characters in the range
1581    * [`start`, `start + startLength`) into an array of characters.
1582    * All characters must be invariant (see utypes.h).
1583    * Use US_INV as the last, signature-distinguishing parameter.
1584    *
1585    * This function does not write any more than `targetCapacity`
1586    * characters but returns the length of the entire output string
1587    * so that one can allocate a larger buffer and call the function again
1588    * if necessary.
1589    * The output string is NUL-terminated if possible.
1590    *
1591    * @param start offset of first character which will be copied
1592    * @param startLength the number of characters to extract
1593    * @param target the target buffer for extraction, can be nullptr
1594    *               if targetLength is 0
1595    * @param targetCapacity the length of the target buffer
1596    * @param inv Signature-distinguishing parameter, use US_INV.
1597    * @return the output string length, not including the terminating NUL
1598    * @stable ICU 3.2
1599    */
1600   int32_t extract(int32_t start,
1601            int32_t startLength,
1602            char *target,
1603            int32_t targetCapacity,
1604            enum EInvariant inv) const;
1605 
1606 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
1607 
1608   /**
1609    * Copy the characters in the range
1610    * [`start`, `start + length`) into an array of characters
1611    * in the platform's default codepage.
1612    * This function does not write any more than `targetLength`
1613    * characters but returns the length of the entire output string
1614    * so that one can allocate a larger buffer and call the function again
1615    * if necessary.
1616    * The output string is NUL-terminated if possible.
1617    *
1618    * @param start offset of first character which will be copied
1619    * @param startLength the number of characters to extract
1620    * @param target the target buffer for extraction
1621    * @param targetLength the length of the target buffer
1622    * If `target` is nullptr, then the number of bytes required for
1623    * `target` is returned.
1624    * @return the output string length, not including the terminating NUL
1625    * @stable ICU 2.0
1626    */
1627   int32_t extract(int32_t start,
1628            int32_t startLength,
1629            char *target,
1630            uint32_t targetLength) const;
1631 
1632 #endif
1633 
1634 #if !UCONFIG_NO_CONVERSION
1635 
1636   /**
1637    * Copy the characters in the range
1638    * [`start`, `start + length`) into an array of characters
1639    * in a specified codepage.
1640    * The output string is NUL-terminated.
1641    *
1642    * Recommendation: For invariant-character strings use
1643    * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1644    * because it avoids object code dependencies of UnicodeString on
1645    * the conversion code.
1646    *
1647    * @param start offset of first character which will be copied
1648    * @param startLength the number of characters to extract
1649    * @param target the target buffer for extraction
1650    * @param codepage the desired codepage for the characters.  0 has
1651    * the special meaning of the default codepage
1652    * If `codepage` is an empty string (`""`),
1653    * then a simple conversion is performed on the codepage-invariant
1654    * subset ("invariant characters") of the platform encoding. See utypes.h.
1655    * If `target` is nullptr, then the number of bytes required for
1656    * `target` is returned. It is assumed that the target is big enough
1657    * to fit all of the characters.
1658    * @return the output string length, not including the terminating NUL
1659    * @stable ICU 2.0
1660    */
1661   inline int32_t extract(int32_t start,
1662                          int32_t startLength,
1663                          char* target,
1664                          const char* codepage = nullptr) const;
1665 
1666   /**
1667    * Copy the characters in the range
1668    * [`start`, `start + length`) into an array of characters
1669    * in a specified codepage.
1670    * This function does not write any more than `targetLength`
1671    * characters but returns the length of the entire output string
1672    * so that one can allocate a larger buffer and call the function again
1673    * if necessary.
1674    * The output string is NUL-terminated if possible.
1675    *
1676    * Recommendation: For invariant-character strings use
1677    * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1678    * because it avoids object code dependencies of UnicodeString on
1679    * the conversion code.
1680    *
1681    * @param start offset of first character which will be copied
1682    * @param startLength the number of characters to extract
1683    * @param target the target buffer for extraction
1684    * @param targetLength the length of the target buffer
1685    * @param codepage the desired codepage for the characters.  0 has
1686    * the special meaning of the default codepage
1687    * If `codepage` is an empty string (`""`),
1688    * then a simple conversion is performed on the codepage-invariant
1689    * subset ("invariant characters") of the platform encoding. See utypes.h.
1690    * If `target` is nullptr, then the number of bytes required for
1691    * `target` is returned.
1692    * @return the output string length, not including the terminating NUL
1693    * @stable ICU 2.0
1694    */
1695   int32_t extract(int32_t start,
1696            int32_t startLength,
1697            char *target,
1698            uint32_t targetLength,
1699            const char *codepage) const;
1700 
1701   /**
1702    * Convert the UnicodeString into a codepage string using an existing UConverter.
1703    * The output string is NUL-terminated if possible.
1704    *
1705    * This function avoids the overhead of opening and closing a converter if
1706    * multiple strings are extracted.
1707    *
1708    * @param dest destination string buffer, can be nullptr if destCapacity==0
1709    * @param destCapacity the number of chars available at dest
1710    * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
1711    *        or nullptr for the default converter
1712    * @param errorCode normal ICU error code
1713    * @return the length of the output string, not counting the terminating NUL;
1714    *         if the length is greater than destCapacity, then the string will not fit
1715    *         and a buffer of the indicated length would need to be passed in
1716    * @stable ICU 2.0
1717    */
1718   int32_t extract(char *dest, int32_t destCapacity,
1719                   UConverter *cnv,
1720                   UErrorCode &errorCode) const;
1721 
1722 #endif
1723 
1724   /**
1725    * Create a temporary substring for the specified range.
1726    * Unlike the substring constructor and setTo() functions,
1727    * the object returned here will be a read-only alias (using getBuffer())
1728    * rather than copying the text.
1729    * As a result, this substring operation is much faster but requires
1730    * that the original string not be modified or deleted during the lifetime
1731    * of the returned substring object.
1732    * @param start offset of the first character visible in the substring
1733    * @param length length of the substring
1734    * @return a read-only alias UnicodeString object for the substring
1735    * @stable ICU 4.4
1736    */
1737   UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
1738 
1739   /**
1740    * Create a temporary substring for the specified range.
1741    * Same as tempSubString(start, length) except that the substring range
1742    * is specified as a (start, limit) pair (with an exclusive limit index)
1743    * rather than a (start, length) pair.
1744    * @param start offset of the first character visible in the substring
1745    * @param limit offset immediately following the last character visible in the substring
1746    * @return a read-only alias UnicodeString object for the substring
1747    * @stable ICU 4.4
1748    */
1749   inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
1750 
1751   /**
1752    * Convert the UnicodeString to UTF-8 and write the result
1753    * to a ByteSink. This is called by toUTF8String().
1754    * Unpaired surrogates are replaced with U+FFFD.
1755    * Calls u_strToUTF8WithSub().
1756    *
1757    * @param sink A ByteSink to which the UTF-8 version of the string is written.
1758    *             sink.Flush() is called at the end.
1759    * @stable ICU 4.2
1760    * @see toUTF8String
1761    */
1762   void toUTF8(ByteSink &sink) const;
1763 
1764   /**
1765    * Convert the UnicodeString to UTF-8 and append the result
1766    * to a standard string.
1767    * Unpaired surrogates are replaced with U+FFFD.
1768    * Calls toUTF8().
1769    *
1770    * @param result A standard string (or a compatible object)
1771    *        to which the UTF-8 version of the string is appended.
1772    * @return The string object.
1773    * @stable ICU 4.2
1774    * @see toUTF8
1775    */
1776   template<typename StringClass>
toUTF8String(StringClass & result)1777   StringClass &toUTF8String(StringClass &result) const {
1778     StringByteSink<StringClass> sbs(&result, length());
1779     toUTF8(sbs);
1780     return result;
1781   }
1782 
1783   /**
1784    * Convert the UnicodeString to UTF-32.
1785    * Unpaired surrogates are replaced with U+FFFD.
1786    * Calls u_strToUTF32WithSub().
1787    *
1788    * @param utf32 destination string buffer, can be nullptr if capacity==0
1789    * @param capacity the number of UChar32s available at utf32
1790    * @param errorCode Standard ICU error code. Its input value must
1791    *                  pass the U_SUCCESS() test, or else the function returns
1792    *                  immediately. Check for U_FAILURE() on output or use with
1793    *                  function chaining. (See User Guide for details.)
1794    * @return The length of the UTF-32 string.
1795    * @see fromUTF32
1796    * @stable ICU 4.2
1797    */
1798   int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
1799 
1800   /* Length operations */
1801 
1802   /**
1803    * Return the length of the UnicodeString object.
1804    * The length is the number of char16_t code units are in the UnicodeString.
1805    * If you want the number of code points, please use countChar32().
1806    * @return the length of the UnicodeString object
1807    * @see countChar32
1808    * @stable ICU 2.0
1809    */
1810   inline int32_t length() const;
1811 
1812   /**
1813    * Count Unicode code points in the length char16_t code units of the string.
1814    * A code point may occupy either one or two char16_t code units.
1815    * Counting code points involves reading all code units.
1816    *
1817    * This functions is basically the inverse of moveIndex32().
1818    *
1819    * @param start the index of the first code unit to check
1820    * @param length the number of char16_t code units to check
1821    * @return the number of code points in the specified code units
1822    * @see length
1823    * @stable ICU 2.0
1824    */
1825   int32_t
1826   countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
1827 
1828   /**
1829    * Check if the length char16_t code units of the string
1830    * contain more Unicode code points than a certain number.
1831    * This is more efficient than counting all code points in this part of the string
1832    * and comparing that number with a threshold.
1833    * This function may not need to scan the string at all if the length
1834    * falls within a certain range, and
1835    * never needs to count more than 'number+1' code points.
1836    * Logically equivalent to (countChar32(start, length)>number).
1837    * A Unicode code point may occupy either one or two char16_t code units.
1838    *
1839    * @param start the index of the first code unit to check (0 for the entire string)
1840    * @param length the number of char16_t code units to check
1841    *               (use INT32_MAX for the entire string; remember that start/length
1842    *                values are pinned)
1843    * @param number The number of code points in the (sub)string is compared against
1844    *               the 'number' parameter.
1845    * @return Boolean value for whether the string contains more Unicode code points
1846    *         than 'number'. Same as (u_countChar32(s, length)>number).
1847    * @see countChar32
1848    * @see u_strHasMoreChar32Than
1849    * @stable ICU 2.4
1850    */
1851   UBool
1852   hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
1853 
1854   /**
1855    * Determine if this string is empty.
1856    * @return true if this string contains 0 characters, false otherwise.
1857    * @stable ICU 2.0
1858    */
1859   inline UBool isEmpty() const;
1860 
1861   /**
1862    * Return the capacity of the internal buffer of the UnicodeString object.
1863    * This is useful together with the getBuffer functions.
1864    * See there for details.
1865    *
1866    * @return the number of char16_ts available in the internal buffer
1867    * @see getBuffer
1868    * @stable ICU 2.0
1869    */
1870   inline int32_t getCapacity() const;
1871 
1872   /* Other operations */
1873 
1874   /**
1875    * Generate a hash code for this object.
1876    * @return The hash code of this UnicodeString.
1877    * @stable ICU 2.0
1878    */
1879   inline int32_t hashCode() const;
1880 
1881   /**
1882    * Determine if this object contains a valid string.
1883    * A bogus string has no value. It is different from an empty string,
1884    * although in both cases isEmpty() returns true and length() returns 0.
1885    * setToBogus() and isBogus() can be used to indicate that no string value is available.
1886    * For a bogus string, getBuffer() and getTerminatedBuffer() return nullptr, and
1887    * length() returns 0.
1888    *
1889    * @return true if the string is bogus/invalid, false otherwise
1890    * @see setToBogus()
1891    * @stable ICU 2.0
1892    */
1893   inline UBool isBogus() const;
1894 
1895   //========================================
1896   // Write operations
1897   //========================================
1898 
1899   /* Assignment operations */
1900 
1901   /**
1902    * Assignment operator.  Replace the characters in this UnicodeString
1903    * with the characters from `srcText`.
1904    *
1905    * Starting with ICU 2.4, the assignment operator and the copy constructor
1906    * allocate a new buffer and copy the buffer contents even for readonly aliases.
1907    * By contrast, the fastCopyFrom() function implements the old,
1908    * more efficient but less safe behavior
1909    * of making this string also a readonly alias to the same buffer.
1910    *
1911    * If the source object has an "open" buffer from getBuffer(minCapacity),
1912    * then the copy is an empty string.
1913    *
1914    * @param srcText The text containing the characters to replace
1915    * @return a reference to this
1916    * @stable ICU 2.0
1917    * @see fastCopyFrom
1918    */
1919   UnicodeString &operator=(const UnicodeString &srcText);
1920 
1921   /**
1922    * Almost the same as the assignment operator.
1923    * Replace the characters in this UnicodeString
1924    * with the characters from `srcText`.
1925    *
1926    * This function works the same as the assignment operator
1927    * for all strings except for ones that are readonly aliases.
1928    *
1929    * Starting with ICU 2.4, the assignment operator and the copy constructor
1930    * allocate a new buffer and copy the buffer contents even for readonly aliases.
1931    * This function implements the old, more efficient but less safe behavior
1932    * of making this string also a readonly alias to the same buffer.
1933    *
1934    * The fastCopyFrom function must be used only if it is known that the lifetime of
1935    * this UnicodeString does not exceed the lifetime of the aliased buffer
1936    * including its contents, for example for strings from resource bundles
1937    * or aliases to string constants.
1938    *
1939    * If the source object has an "open" buffer from getBuffer(minCapacity),
1940    * then the copy is an empty string.
1941    *
1942    * @param src The text containing the characters to replace.
1943    * @return a reference to this
1944    * @stable ICU 2.4
1945    */
1946   UnicodeString &fastCopyFrom(const UnicodeString &src);
1947 
1948 #ifndef U_HIDE_DRAFT_API
1949   /**
1950    * Assignment operator. Replaces the characters in this UnicodeString
1951    * with a copy of the characters from the `src`
1952    * which is, or which is implicitly convertible to,
1953    * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.
1954    *
1955    * @param src The string view containing the characters to copy.
1956    * @return a reference to this
1957    * @draft ICU 76
1958    */
1959   template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
1960   inline UnicodeString &operator=(const S &src) {
1961     unBogus();
1962     return doReplace(0, length(), internal::toU16StringView(src));
1963   }
1964 #endif  // U_HIDE_DRAFT_API
1965 
1966   /**
1967    * Move assignment operator; might leave src in bogus state.
1968    * This string will have the same contents and state that the source string had.
1969    * The behavior is undefined if *this and src are the same object.
1970    * @param src source string
1971    * @return *this
1972    * @stable ICU 56
1973    */
1974   UnicodeString &operator=(UnicodeString &&src) noexcept;
1975 
1976   /**
1977    * Swap strings.
1978    * @param other other string
1979    * @stable ICU 56
1980    */
1981   void swap(UnicodeString &other) noexcept;
1982 
1983   /**
1984    * Non-member UnicodeString swap function.
1985    * @param s1 will get s2's contents and state
1986    * @param s2 will get s1's contents and state
1987    * @stable ICU 56
1988    */
1989   friend inline void U_EXPORT2
swap(UnicodeString & s1,UnicodeString & s2)1990   swap(UnicodeString &s1, UnicodeString &s2) noexcept {
1991     s1.swap(s2);
1992   }
1993 
1994   /**
1995    * Assignment operator.  Replace the characters in this UnicodeString
1996    * with the code unit `ch`.
1997    * @param ch the code unit to replace
1998    * @return a reference to this
1999    * @stable ICU 2.0
2000    */
2001   inline UnicodeString& operator= (char16_t ch);
2002 
2003   /**
2004    * Assignment operator.  Replace the characters in this UnicodeString
2005    * with the code point `ch`.
2006    * @param ch the code point to replace
2007    * @return a reference to this
2008    * @stable ICU 2.0
2009    */
2010   inline UnicodeString& operator= (UChar32 ch);
2011 
2012   /**
2013    * Set the text in the UnicodeString object to the characters
2014    * in `srcText` in the range
2015    * [`srcStart`, `srcText.length()`).
2016    * `srcText` is not modified.
2017    * @param srcText the source for the new characters
2018    * @param srcStart the offset into `srcText` where new characters
2019    * will be obtained
2020    * @return a reference to this
2021    * @stable ICU 2.2
2022    */
2023   inline UnicodeString& setTo(const UnicodeString& srcText,
2024                int32_t srcStart);
2025 
2026   /**
2027    * Set the text in the UnicodeString object to the characters
2028    * in `srcText` in the range
2029    * [`srcStart`, `srcStart + srcLength`).
2030    * `srcText` is not modified.
2031    * @param srcText the source for the new characters
2032    * @param srcStart the offset into `srcText` where new characters
2033    * will be obtained
2034    * @param srcLength the number of characters in `srcText` in the
2035    * replace string.
2036    * @return a reference to this
2037    * @stable ICU 2.0
2038    */
2039   inline UnicodeString& setTo(const UnicodeString& srcText,
2040                int32_t srcStart,
2041                int32_t srcLength);
2042 
2043   /**
2044    * Set the text in the UnicodeString object to the characters in
2045    * `srcText`.
2046    * `srcText` is not modified.
2047    * @param srcText the source for the new characters
2048    * @return a reference to this
2049    * @stable ICU 2.0
2050    */
2051   inline UnicodeString& setTo(const UnicodeString& srcText);
2052 
2053   /**
2054    * Set the characters in the UnicodeString object to the characters
2055    * in `srcChars`. `srcChars` is not modified.
2056    * @param srcChars the source for the new characters
2057    * @param srcLength the number of Unicode characters in srcChars.
2058    * @return a reference to this
2059    * @stable ICU 2.0
2060    */
2061   inline UnicodeString& setTo(const char16_t *srcChars,
2062                int32_t srcLength);
2063 
2064   /**
2065    * Set the characters in the UnicodeString object to the code unit
2066    * `srcChar`.
2067    * @param srcChar the code unit which becomes the UnicodeString's character
2068    * content
2069    * @return a reference to this
2070    * @stable ICU 2.0
2071    */
2072   inline UnicodeString& setTo(char16_t srcChar);
2073 
2074   /**
2075    * Set the characters in the UnicodeString object to the code point
2076    * `srcChar`.
2077    * @param srcChar the code point which becomes the UnicodeString's character
2078    * content
2079    * @return a reference to this
2080    * @stable ICU 2.0
2081    */
2082   inline UnicodeString& setTo(UChar32 srcChar);
2083 
2084   /**
2085    * Aliasing setTo() function, analogous to the readonly-aliasing char16_t* constructor.
2086    * The text will be used for the UnicodeString object, but
2087    * it will not be released when the UnicodeString is destroyed.
2088    * This has copy-on-write semantics:
2089    * When the string is modified, then the buffer is first copied into
2090    * newly allocated memory.
2091    * The aliased buffer is never modified.
2092    *
2093    * In an assignment to another UnicodeString, when using the copy constructor
2094    * or the assignment operator, the text will be copied.
2095    * When using fastCopyFrom(), the text will be aliased again,
2096    * so that both strings then alias the same readonly-text.
2097    *
2098    * @param isTerminated specifies if `text` is `NUL`-terminated.
2099    *                     This must be true if `textLength==-1`.
2100    * @param text The characters to alias for the UnicodeString.
2101    * @param textLength The number of Unicode characters in `text` to alias.
2102    *                   If -1, then this constructor will determine the length
2103    *                   by calling `u_strlen()`.
2104    * @return a reference to this
2105    * @stable ICU 2.0
2106    */
2107   UnicodeString &setTo(UBool isTerminated,
2108                        ConstChar16Ptr text,
2109                        int32_t textLength);
2110 
2111   /**
2112    * Aliasing setTo() function, analogous to the writable-aliasing char16_t* constructor.
2113    * The text will be used for the UnicodeString object, but
2114    * it will not be released when the UnicodeString is destroyed.
2115    * This has write-through semantics:
2116    * For as long as the capacity of the buffer is sufficient, write operations
2117    * will directly affect the buffer. When more capacity is necessary, then
2118    * a new buffer will be allocated and the contents copied as with regularly
2119    * constructed strings.
2120    * In an assignment to another UnicodeString, the buffer will be copied.
2121    * The extract(Char16Ptr dst) function detects whether the dst pointer is the same
2122    * as the string buffer itself and will in this case not copy the contents.
2123    *
2124    * @param buffer The characters to alias for the UnicodeString.
2125    * @param buffLength The number of Unicode characters in `buffer` to alias.
2126    * @param buffCapacity The size of `buffer` in char16_ts.
2127    * @return a reference to this
2128    * @stable ICU 2.0
2129    */
2130   UnicodeString &setTo(char16_t *buffer,
2131                        int32_t buffLength,
2132                        int32_t buffCapacity);
2133 
2134   /**
2135    * Make this UnicodeString object invalid.
2136    * The string will test true with isBogus().
2137    *
2138    * A bogus string has no value. It is different from an empty string.
2139    * It can be used to indicate that no string value is available.
2140    * getBuffer() and getTerminatedBuffer() return nullptr, and
2141    * length() returns 0.
2142    *
2143    * This utility function is used throughout the UnicodeString
2144    * implementation to indicate that a UnicodeString operation failed,
2145    * and may be used in other functions,
2146    * especially but not exclusively when such functions do not
2147    * take a UErrorCode for simplicity.
2148    *
2149    * The following methods, and no others, will clear a string object's bogus flag:
2150    * - remove()
2151    * - remove(0, INT32_MAX)
2152    * - truncate(0)
2153    * - operator=() (assignment operator)
2154    * - setTo(...)
2155    *
2156    * The simplest ways to turn a bogus string into an empty one
2157    * is to use the remove() function.
2158    * Examples for other functions that are equivalent to "set to empty string":
2159    * \code
2160    * if(s.isBogus()) {
2161    *   s.remove();           // set to an empty string (remove all), or
2162    *   s.remove(0, INT32_MAX); // set to an empty string (remove all), or
2163    *   s.truncate(0);        // set to an empty string (complete truncation), or
2164    *   s=UnicodeString();    // assign an empty string, or
2165    *   s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
2166    *   s.setTo(u"", 0);      // set to an empty C Unicode string
2167    * }
2168    * \endcode
2169    *
2170    * @see isBogus()
2171    * @stable ICU 2.0
2172    */
2173   void setToBogus();
2174 
2175   /**
2176    * Set the character at the specified offset to the specified character.
2177    * @param offset A valid offset into the text of the character to set
2178    * @param ch The new character
2179    * @return A reference to this
2180    * @stable ICU 2.0
2181    */
2182   UnicodeString& setCharAt(int32_t offset,
2183                char16_t ch);
2184 
2185 
2186   /* Append operations */
2187 
2188   /**
2189    * Append operator. Append the code unit `ch` to the UnicodeString
2190    * object.
2191    * @param ch the code unit to be appended
2192    * @return a reference to this
2193    * @stable ICU 2.0
2194    */
2195  inline  UnicodeString& operator+= (char16_t ch);
2196 
2197   /**
2198    * Append operator. Append the code point `ch` to the UnicodeString
2199    * object.
2200    * @param ch the code point to be appended
2201    * @return a reference to this
2202    * @stable ICU 2.0
2203    */
2204  inline  UnicodeString& operator+= (UChar32 ch);
2205 
2206   /**
2207    * Append operator. Append the characters in `srcText` to the
2208    * UnicodeString object. `srcText` is not modified.
2209    * @param srcText the source for the new characters
2210    * @return a reference to this
2211    * @stable ICU 2.0
2212    */
2213   inline UnicodeString& operator+= (const UnicodeString& srcText);
2214 
2215 #ifndef U_HIDE_DRAFT_API
2216   /**
2217    * Append operator. Appends the characters in `src`
2218    * which is, or which is implicitly convertible to,
2219    * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view,
2220    * to the UnicodeString object.
2221    *
2222    * @param src the source for the new characters
2223    * @return a reference to this
2224    * @draft ICU 76
2225    */
2226   template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
2227   inline UnicodeString& operator+=(const S &src) {
2228     return doAppend(internal::toU16StringView(src));
2229   }
2230 #endif  // U_HIDE_DRAFT_API
2231 
2232   /**
2233    * Append the characters
2234    * in `srcText` in the range
2235    * [`srcStart`, `srcStart + srcLength`) to the
2236    * UnicodeString object at offset `start`. `srcText`
2237    * is not modified.
2238    * @param srcText the source for the new characters
2239    * @param srcStart the offset into `srcText` where new characters
2240    * will be obtained
2241    * @param srcLength the number of characters in `srcText` in
2242    * the append string
2243    * @return a reference to this
2244    * @stable ICU 2.0
2245    */
2246   inline UnicodeString& append(const UnicodeString& srcText,
2247             int32_t srcStart,
2248             int32_t srcLength);
2249 
2250   /**
2251    * Append the characters in `srcText` to the UnicodeString object.
2252    * `srcText` is not modified.
2253    * @param srcText the source for the new characters
2254    * @return a reference to this
2255    * @stable ICU 2.0
2256    */
2257   inline UnicodeString& append(const UnicodeString& srcText);
2258 
2259   /**
2260    * Append the characters in `srcChars` in the range
2261    * [`srcStart`, `srcStart + srcLength`) to the UnicodeString
2262    * object at offset
2263    * `start`. `srcChars` is not modified.
2264    * @param srcChars the source for the new characters
2265    * @param srcStart the offset into `srcChars` where new characters
2266    * will be obtained
2267    * @param srcLength the number of characters in `srcChars` in
2268    *                  the append string; can be -1 if `srcChars` is NUL-terminated
2269    * @return a reference to this
2270    * @stable ICU 2.0
2271    */
2272   inline UnicodeString& append(const char16_t *srcChars,
2273             int32_t srcStart,
2274             int32_t srcLength);
2275 
2276   /**
2277    * Append the characters in `srcChars` to the UnicodeString object.
2278    * `srcChars` is not modified.
2279    * @param srcChars the source for the new characters
2280    * @param srcLength the number of Unicode characters in `srcChars`;
2281    *                  can be -1 if `srcChars` is NUL-terminated
2282    * @return a reference to this
2283    * @stable ICU 2.0
2284    */
2285   inline UnicodeString& append(ConstChar16Ptr srcChars,
2286             int32_t srcLength);
2287 
2288 #ifndef U_HIDE_DRAFT_API
2289   /**
2290    * Appends the characters in `src`
2291    * which is, or which is implicitly convertible to,
2292    * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view,
2293    * to the UnicodeString object.
2294    *
2295    * @param src the source for the new characters
2296    * @return a reference to this
2297    * @draft ICU 76
2298    */
2299   template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
append(const S & src)2300   inline UnicodeString& append(const S &src) {
2301     return doAppend(internal::toU16StringView(src));
2302   }
2303 #endif  // U_HIDE_DRAFT_API
2304 
2305   /**
2306    * Append the code unit `srcChar` to the UnicodeString object.
2307    * @param srcChar the code unit to append
2308    * @return a reference to this
2309    * @stable ICU 2.0
2310    */
2311   inline UnicodeString& append(char16_t srcChar);
2312 
2313   /**
2314    * Append the code point `srcChar` to the UnicodeString object.
2315    * @param srcChar the code point to append
2316    * @return a reference to this
2317    * @stable ICU 2.0
2318    */
2319   UnicodeString& append(UChar32 srcChar);
2320 
2321 
2322   /* Insert operations */
2323 
2324   /**
2325    * Insert the characters in `srcText` in the range
2326    * [`srcStart`, `srcStart + srcLength`) into the UnicodeString
2327    * object at offset `start`. `srcText` is not modified.
2328    * @param start the offset where the insertion begins
2329    * @param srcText the source for the new characters
2330    * @param srcStart the offset into `srcText` where new characters
2331    * will be obtained
2332    * @param srcLength the number of characters in `srcText` in
2333    * the insert string
2334    * @return a reference to this
2335    * @stable ICU 2.0
2336    */
2337   inline UnicodeString& insert(int32_t start,
2338             const UnicodeString& srcText,
2339             int32_t srcStart,
2340             int32_t srcLength);
2341 
2342   /**
2343    * Insert the characters in `srcText` into the UnicodeString object
2344    * at offset `start`. `srcText` is not modified.
2345    * @param start the offset where the insertion begins
2346    * @param srcText the source for the new characters
2347    * @return a reference to this
2348    * @stable ICU 2.0
2349    */
2350   inline UnicodeString& insert(int32_t start,
2351             const UnicodeString& srcText);
2352 
2353   /**
2354    * Insert the characters in `srcChars` in the range
2355    * [`srcStart`, `srcStart + srcLength`) into the UnicodeString
2356    *  object at offset `start`. `srcChars` is not modified.
2357    * @param start the offset at which the insertion begins
2358    * @param srcChars the source for the new characters
2359    * @param srcStart the offset into `srcChars` where new characters
2360    * will be obtained
2361    * @param srcLength the number of characters in `srcChars`
2362    * in the insert string
2363    * @return a reference to this
2364    * @stable ICU 2.0
2365    */
2366   inline UnicodeString& insert(int32_t start,
2367             const char16_t *srcChars,
2368             int32_t srcStart,
2369             int32_t srcLength);
2370 
2371   /**
2372    * Insert the characters in `srcChars` into the UnicodeString object
2373    * at offset `start`. `srcChars` is not modified.
2374    * @param start the offset where the insertion begins
2375    * @param srcChars the source for the new characters
2376    * @param srcLength the number of Unicode characters in srcChars.
2377    * @return a reference to this
2378    * @stable ICU 2.0
2379    */
2380   inline UnicodeString& insert(int32_t start,
2381             ConstChar16Ptr srcChars,
2382             int32_t srcLength);
2383 
2384   /**
2385    * Insert the code unit `srcChar` into the UnicodeString object at
2386    * offset `start`.
2387    * @param start the offset at which the insertion occurs
2388    * @param srcChar the code unit to insert
2389    * @return a reference to this
2390    * @stable ICU 2.0
2391    */
2392   inline UnicodeString& insert(int32_t start,
2393             char16_t srcChar);
2394 
2395   /**
2396    * Insert the code point `srcChar` into the UnicodeString object at
2397    * offset `start`.
2398    * @param start the offset at which the insertion occurs
2399    * @param srcChar the code point to insert
2400    * @return a reference to this
2401    * @stable ICU 2.0
2402    */
2403   inline UnicodeString& insert(int32_t start,
2404             UChar32 srcChar);
2405 
2406 
2407   /* Replace operations */
2408 
2409   /**
2410    * Replace the characters in the range
2411    * [`start`, `start + length`) with the characters in
2412    * `srcText` in the range
2413    * [`srcStart`, `srcStart + srcLength`).
2414    * `srcText` is not modified.
2415    * @param start the offset at which the replace operation begins
2416    * @param length the number of characters to replace. The character at
2417    * `start + length` is not modified.
2418    * @param srcText the source for the new characters
2419    * @param srcStart the offset into `srcText` where new characters
2420    * will be obtained
2421    * @param srcLength the number of characters in `srcText` in
2422    * the replace string
2423    * @return a reference to this
2424    * @stable ICU 2.0
2425    */
2426   inline UnicodeString& replace(int32_t start,
2427              int32_t length,
2428              const UnicodeString& srcText,
2429              int32_t srcStart,
2430              int32_t srcLength);
2431 
2432   /**
2433    * Replace the characters in the range
2434    * [`start`, `start + length`)
2435    * with the characters in `srcText`.  `srcText` is
2436    *  not modified.
2437    * @param start the offset at which the replace operation begins
2438    * @param length the number of characters to replace. The character at
2439    * `start + length` is not modified.
2440    * @param srcText the source for the new characters
2441    * @return a reference to this
2442    * @stable ICU 2.0
2443    */
2444   inline UnicodeString& replace(int32_t start,
2445              int32_t length,
2446              const UnicodeString& srcText);
2447 
2448   /**
2449    * Replace the characters in the range
2450    * [`start`, `start + length`) with the characters in
2451    * `srcChars` in the range
2452    * [`srcStart`, `srcStart + srcLength`). `srcChars`
2453    * is not modified.
2454    * @param start the offset at which the replace operation begins
2455    * @param length the number of characters to replace.  The character at
2456    * `start + length` is not modified.
2457    * @param srcChars the source for the new characters
2458    * @param srcStart the offset into `srcChars` where new characters
2459    * will be obtained
2460    * @param srcLength the number of characters in `srcChars`
2461    * in the replace string
2462    * @return a reference to this
2463    * @stable ICU 2.0
2464    */
2465   inline UnicodeString& replace(int32_t start,
2466              int32_t length,
2467              const char16_t *srcChars,
2468              int32_t srcStart,
2469              int32_t srcLength);
2470 
2471   /**
2472    * Replace the characters in the range
2473    * [`start`, `start + length`) with the characters in
2474    * `srcChars`.  `srcChars` is not modified.
2475    * @param start the offset at which the replace operation begins
2476    * @param length number of characters to replace.  The character at
2477    * `start + length` is not modified.
2478    * @param srcChars the source for the new characters
2479    * @param srcLength the number of Unicode characters in srcChars
2480    * @return a reference to this
2481    * @stable ICU 2.0
2482    */
2483   inline UnicodeString& replace(int32_t start,
2484              int32_t length,
2485              ConstChar16Ptr srcChars,
2486              int32_t srcLength);
2487 
2488   /**
2489    * Replace the characters in the range
2490    * [`start`, `start + length`) with the code unit
2491    * `srcChar`.
2492    * @param start the offset at which the replace operation begins
2493    * @param length the number of characters to replace.  The character at
2494    * `start + length` is not modified.
2495    * @param srcChar the new code unit
2496    * @return a reference to this
2497    * @stable ICU 2.0
2498    */
2499   inline UnicodeString& replace(int32_t start,
2500              int32_t length,
2501              char16_t srcChar);
2502 
2503   /**
2504    * Replace the characters in the range
2505    * [`start`, `start + length`) with the code point
2506    * `srcChar`.
2507    * @param start the offset at which the replace operation begins
2508    * @param length the number of characters to replace.  The character at
2509    * `start + length` is not modified.
2510    * @param srcChar the new code point
2511    * @return a reference to this
2512    * @stable ICU 2.0
2513    */
2514   UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
2515 
2516   /**
2517    * Replace the characters in the range [`start`, `limit`)
2518    * with the characters in `srcText`. `srcText` is not modified.
2519    * @param start the offset at which the replace operation begins
2520    * @param limit the offset immediately following the replace range
2521    * @param srcText the source for the new characters
2522    * @return a reference to this
2523    * @stable ICU 2.0
2524    */
2525   inline UnicodeString& replaceBetween(int32_t start,
2526                 int32_t limit,
2527                 const UnicodeString& srcText);
2528 
2529   /**
2530    * Replace the characters in the range [`start`, `limit`)
2531    * with the characters in `srcText` in the range
2532    * [`srcStart`, `srcLimit`). `srcText` is not modified.
2533    * @param start the offset at which the replace operation begins
2534    * @param limit the offset immediately following the replace range
2535    * @param srcText the source for the new characters
2536    * @param srcStart the offset into `srcChars` where new characters
2537    * will be obtained
2538    * @param srcLimit the offset immediately following the range to copy
2539    * in `srcText`
2540    * @return a reference to this
2541    * @stable ICU 2.0
2542    */
2543   inline UnicodeString& replaceBetween(int32_t start,
2544                 int32_t limit,
2545                 const UnicodeString& srcText,
2546                 int32_t srcStart,
2547                 int32_t srcLimit);
2548 
2549   /**
2550    * Replace a substring of this object with the given text.
2551    * @param start the beginning index, inclusive; `0 <= start <= limit`.
2552    * @param limit the ending index, exclusive; `start <= limit <= length()`.
2553    * @param text the text to replace characters `start` to `limit - 1`
2554    * @stable ICU 2.0
2555    */
2556   virtual void handleReplaceBetween(int32_t start,
2557                                     int32_t limit,
2558                                     const UnicodeString& text) override;
2559 
2560   /**
2561    * Replaceable API
2562    * @return true if it has MetaData
2563    * @stable ICU 2.4
2564    */
2565   virtual UBool hasMetaData() const override;
2566 
2567   /**
2568    * Copy a substring of this object, retaining attribute (out-of-band)
2569    * information.  This method is used to duplicate or reorder substrings.
2570    * The destination index must not overlap the source range.
2571    *
2572    * @param start the beginning index, inclusive; `0 <= start <= limit`.
2573    * @param limit the ending index, exclusive; `start <= limit <= length()`.
2574    * @param dest the destination index.  The characters from
2575    *             `start..limit-1` will be copied to `dest`.
2576    * Implementations of this method may assume that `dest <= start ||
2577    * dest >= limit`.
2578    * @stable ICU 2.0
2579    */
2580   virtual void copy(int32_t start, int32_t limit, int32_t dest) override;
2581 
2582   /* Search and replace operations */
2583 
2584   /**
2585    * Replace all occurrences of characters in oldText with the characters
2586    * in newText
2587    * @param oldText the text containing the search text
2588    * @param newText the text containing the replacement text
2589    * @return a reference to this
2590    * @stable ICU 2.0
2591    */
2592   inline UnicodeString& findAndReplace(const UnicodeString& oldText,
2593                 const UnicodeString& newText);
2594 
2595   /**
2596    * Replace all occurrences of characters in oldText with characters
2597    * in newText
2598    * in the range [`start`, `start + length`).
2599    * @param start the start of the range in which replace will performed
2600    * @param length the length of the range in which replace will be performed
2601    * @param oldText the text containing the search text
2602    * @param newText the text containing the replacement text
2603    * @return a reference to this
2604    * @stable ICU 2.0
2605    */
2606   inline UnicodeString& findAndReplace(int32_t start,
2607                 int32_t length,
2608                 const UnicodeString& oldText,
2609                 const UnicodeString& newText);
2610 
2611   /**
2612    * Replace all occurrences of characters in oldText in the range
2613    * [`oldStart`, `oldStart + oldLength`) with the characters
2614    * in newText in the range
2615    * [`newStart`, `newStart + newLength`)
2616    * in the range [`start`, `start + length`).
2617    * @param start the start of the range in which replace will performed
2618    * @param length the length of the range in which replace will be performed
2619    * @param oldText the text containing the search text
2620    * @param oldStart the start of the search range in `oldText`
2621    * @param oldLength the length of the search range in `oldText`
2622    * @param newText the text containing the replacement text
2623    * @param newStart the start of the replacement range in `newText`
2624    * @param newLength the length of the replacement range in `newText`
2625    * @return a reference to this
2626    * @stable ICU 2.0
2627    */
2628   UnicodeString& findAndReplace(int32_t start,
2629                 int32_t length,
2630                 const UnicodeString& oldText,
2631                 int32_t oldStart,
2632                 int32_t oldLength,
2633                 const UnicodeString& newText,
2634                 int32_t newStart,
2635                 int32_t newLength);
2636 
2637 
2638   /* Remove operations */
2639 
2640   /**
2641    * Removes all characters from the UnicodeString object and clears the bogus flag.
2642    * This is the UnicodeString equivalent of std::string’s clear().
2643    *
2644    * @return a reference to this
2645    * @see setToBogus
2646    * @stable ICU 2.0
2647    */
2648   inline UnicodeString& remove();
2649 
2650   /**
2651    * Remove the characters in the range
2652    * [`start`, `start + length`) from the UnicodeString object.
2653    * @param start the offset of the first character to remove
2654    * @param length the number of characters to remove
2655    * @return a reference to this
2656    * @stable ICU 2.0
2657    */
2658   inline UnicodeString& remove(int32_t start,
2659                                int32_t length = static_cast<int32_t>(INT32_MAX));
2660 
2661   /**
2662    * Remove the characters in the range
2663    * [`start`, `limit`) from the UnicodeString object.
2664    * @param start the offset of the first character to remove
2665    * @param limit the offset immediately following the range to remove
2666    * @return a reference to this
2667    * @stable ICU 2.0
2668    */
2669   inline UnicodeString& removeBetween(int32_t start,
2670                                       int32_t limit = static_cast<int32_t>(INT32_MAX));
2671 
2672   /**
2673    * Retain only the characters in the range
2674    * [`start`, `limit`) from the UnicodeString object.
2675    * Removes characters before `start` and at and after `limit`.
2676    * @param start the offset of the first character to retain
2677    * @param limit the offset immediately following the range to retain
2678    * @return a reference to this
2679    * @stable ICU 4.4
2680    */
2681   inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
2682 
2683   /* Length operations */
2684 
2685   /**
2686    * Pad the start of this UnicodeString with the character `padChar`.
2687    * If the length of this UnicodeString is less than targetLength,
2688    * length() - targetLength copies of padChar will be added to the
2689    * beginning of this UnicodeString.
2690    * @param targetLength the desired length of the string
2691    * @param padChar the character to use for padding. Defaults to
2692    * space (U+0020)
2693    * @return true if the text was padded, false otherwise.
2694    * @stable ICU 2.0
2695    */
2696   UBool padLeading(int32_t targetLength,
2697                     char16_t padChar = 0x0020);
2698 
2699   /**
2700    * Pad the end of this UnicodeString with the character `padChar`.
2701    * If the length of this UnicodeString is less than targetLength,
2702    * length() - targetLength copies of padChar will be added to the
2703    * end of this UnicodeString.
2704    * @param targetLength the desired length of the string
2705    * @param padChar the character to use for padding. Defaults to
2706    * space (U+0020)
2707    * @return true if the text was padded, false otherwise.
2708    * @stable ICU 2.0
2709    */
2710   UBool padTrailing(int32_t targetLength,
2711                      char16_t padChar = 0x0020);
2712 
2713   /**
2714    * Truncate this UnicodeString to the `targetLength`.
2715    * @param targetLength the desired length of this UnicodeString.
2716    * @return true if the text was truncated, false otherwise
2717    * @stable ICU 2.0
2718    */
2719   inline UBool truncate(int32_t targetLength);
2720 
2721   /**
2722    * Trims leading and trailing whitespace from this UnicodeString.
2723    * @return a reference to this
2724    * @stable ICU 2.0
2725    */
2726   UnicodeString& trim();
2727 
2728   /* Miscellaneous operations */
2729 
2730   /**
2731    * Reverse this UnicodeString in place.
2732    * @return a reference to this
2733    * @stable ICU 2.0
2734    */
2735   inline UnicodeString& reverse();
2736 
2737   /**
2738    * Reverse the range [`start`, `start + length`) in
2739    * this UnicodeString.
2740    * @param start the start of the range to reverse
2741    * @param length the number of characters to to reverse
2742    * @return a reference to this
2743    * @stable ICU 2.0
2744    */
2745   inline UnicodeString& reverse(int32_t start,
2746              int32_t length);
2747 
2748   /**
2749    * Convert the characters in this to UPPER CASE following the conventions of
2750    * the default locale.
2751    * @return A reference to this.
2752    * @stable ICU 2.0
2753    */
2754   UnicodeString& toUpper();
2755 
2756   /**
2757    * Convert the characters in this to UPPER CASE following the conventions of
2758    * a specific locale.
2759    * @param locale The locale containing the conventions to use.
2760    * @return A reference to this.
2761    * @stable ICU 2.0
2762    */
2763   UnicodeString& toUpper(const Locale& locale);
2764 
2765   /**
2766    * Convert the characters in this to lower case following the conventions of
2767    * the default locale.
2768    * @return A reference to this.
2769    * @stable ICU 2.0
2770    */
2771   UnicodeString& toLower();
2772 
2773   /**
2774    * Convert the characters in this to lower case following the conventions of
2775    * a specific locale.
2776    * @param locale The locale containing the conventions to use.
2777    * @return A reference to this.
2778    * @stable ICU 2.0
2779    */
2780   UnicodeString& toLower(const Locale& locale);
2781 
2782 #if !UCONFIG_NO_BREAK_ITERATION
2783 
2784   /**
2785    * Titlecase this string, convenience function using the default locale.
2786    *
2787    * Casing is locale-dependent and context-sensitive.
2788    * Titlecasing uses a break iterator to find the first characters of words
2789    * that are to be titlecased. It titlecases those characters and lowercases
2790    * all others.
2791    *
2792    * The titlecase break iterator can be provided to customize for arbitrary
2793    * styles, using rules and dictionaries beyond the standard iterators.
2794    * It may be more efficient to always provide an iterator to avoid
2795    * opening and closing one for each string.
2796    * If the break iterator passed in is null, the default Unicode algorithm
2797    * will be used to determine the titlecase positions.
2798    *
2799    * This function uses only the setText(), first() and next() methods of the
2800    * provided break iterator.
2801    *
2802    * @param titleIter A break iterator to find the first characters of words
2803    *                  that are to be titlecased.
2804    *                  If none is provided (0), then a standard titlecase
2805    *                  break iterator is opened.
2806    *                  Otherwise the provided iterator is set to the string's text.
2807    * @return A reference to this.
2808    * @stable ICU 2.1
2809    */
2810   UnicodeString &toTitle(BreakIterator *titleIter);
2811 
2812   /**
2813    * Titlecase this string.
2814    *
2815    * Casing is locale-dependent and context-sensitive.
2816    * Titlecasing uses a break iterator to find the first characters of words
2817    * that are to be titlecased. It titlecases those characters and lowercases
2818    * all others.
2819    *
2820    * The titlecase break iterator can be provided to customize for arbitrary
2821    * styles, using rules and dictionaries beyond the standard iterators.
2822    * It may be more efficient to always provide an iterator to avoid
2823    * opening and closing one for each string.
2824    * If the break iterator passed in is null, the default Unicode algorithm
2825    * will be used to determine the titlecase positions.
2826    *
2827    * This function uses only the setText(), first() and next() methods of the
2828    * provided break iterator.
2829    *
2830    * @param titleIter A break iterator to find the first characters of words
2831    *                  that are to be titlecased.
2832    *                  If none is provided (0), then a standard titlecase
2833    *                  break iterator is opened.
2834    *                  Otherwise the provided iterator is set to the string's text.
2835    * @param locale    The locale to consider.
2836    * @return A reference to this.
2837    * @stable ICU 2.1
2838    */
2839   UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
2840 
2841   /**
2842    * Titlecase this string, with options.
2843    *
2844    * Casing is locale-dependent and context-sensitive.
2845    * Titlecasing uses a break iterator to find the first characters of words
2846    * that are to be titlecased. It titlecases those characters and lowercases
2847    * all others. (This can be modified with options.)
2848    *
2849    * The titlecase break iterator can be provided to customize for arbitrary
2850    * styles, using rules and dictionaries beyond the standard iterators.
2851    * It may be more efficient to always provide an iterator to avoid
2852    * opening and closing one for each string.
2853    * If the break iterator passed in is null, the default Unicode algorithm
2854    * will be used to determine the titlecase positions.
2855    *
2856    * This function uses only the setText(), first() and next() methods of the
2857    * provided break iterator.
2858    *
2859    * @param titleIter A break iterator to find the first characters of words
2860    *                  that are to be titlecased.
2861    *                  If none is provided (0), then a standard titlecase
2862    *                  break iterator is opened.
2863    *                  Otherwise the provided iterator is set to the string's text.
2864    * @param locale    The locale to consider.
2865    * @param options   Options bit set, usually 0. See U_TITLECASE_NO_LOWERCASE,
2866    *                  U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
2867    *                  U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
2868    * @return A reference to this.
2869    * @stable ICU 3.8
2870    */
2871   UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
2872 
2873 #endif
2874 
2875   /**
2876    * Case-folds the characters in this string.
2877    *
2878    * Case-folding is locale-independent and not context-sensitive,
2879    * but there is an option for whether to include or exclude mappings for dotted I
2880    * and dotless i that are marked with 'T' in CaseFolding.txt.
2881    *
2882    * The result may be longer or shorter than the original.
2883    *
2884    * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
2885    * @return A reference to this.
2886    * @stable ICU 2.0
2887    */
2888   UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
2889 
2890   //========================================
2891   // Access to the internal buffer
2892   //========================================
2893 
2894   /**
2895    * Get a read/write pointer to the internal buffer.
2896    * The buffer is guaranteed to be large enough for at least minCapacity char16_ts,
2897    * writable, and is still owned by the UnicodeString object.
2898    * Calls to getBuffer(minCapacity) must not be nested, and
2899    * must be matched with calls to releaseBuffer(newLength).
2900    * If the string buffer was read-only or shared,
2901    * then it will be reallocated and copied.
2902    *
2903    * An attempted nested call will return 0, and will not further modify the
2904    * state of the UnicodeString object.
2905    * It also returns 0 if the string is bogus.
2906    *
2907    * The actual capacity of the string buffer may be larger than minCapacity.
2908    * getCapacity() returns the actual capacity.
2909    * For many operations, the full capacity should be used to avoid reallocations.
2910    *
2911    * While the buffer is "open" between getBuffer(minCapacity)
2912    * and releaseBuffer(newLength), the following applies:
2913    * - The string length is set to 0.
2914    * - Any read API call on the UnicodeString object will behave like on a 0-length string.
2915    * - Any write API call on the UnicodeString object is disallowed and will have no effect.
2916    * - You can read from and write to the returned buffer.
2917    * - The previous string contents will still be in the buffer;
2918    *   if you want to use it, then you need to call length() before getBuffer(minCapacity).
2919    *   If the length() was greater than minCapacity, then any contents after minCapacity
2920    *   may be lost.
2921    *   The buffer contents is not NUL-terminated by getBuffer().
2922    *   If length() < getCapacity() then you can terminate it by writing a NUL
2923    *   at index length().
2924    * - You must call releaseBuffer(newLength) before and in order to
2925    *   return to normal UnicodeString operation.
2926    *
2927    * @param minCapacity the minimum number of char16_ts that are to be available
2928    *        in the buffer, starting at the returned pointer;
2929    *        default to the current string capacity if minCapacity==-1
2930    * @return a writable pointer to the internal string buffer,
2931    *         or nullptr if an error occurs (nested calls, out of memory)
2932    *
2933    * @see releaseBuffer
2934    * @see getTerminatedBuffer()
2935    * @stable ICU 2.0
2936    */
2937   char16_t *getBuffer(int32_t minCapacity);
2938 
2939   /**
2940    * Release a read/write buffer on a UnicodeString object with an
2941    * "open" getBuffer(minCapacity).
2942    * This function must be called in a matched pair with getBuffer(minCapacity).
2943    * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
2944    *
2945    * It will set the string length to newLength, at most to the current capacity.
2946    * If newLength==-1 then it will set the length according to the
2947    * first NUL in the buffer, or to the capacity if there is no NUL.
2948    *
2949    * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
2950    *
2951    * @param newLength the new length of the UnicodeString object;
2952    *        defaults to the current capacity if newLength is greater than that;
2953    *        if newLength==-1, it defaults to u_strlen(buffer) but not more than
2954    *        the current capacity of the string
2955    *
2956    * @see getBuffer(int32_t minCapacity)
2957    * @stable ICU 2.0
2958    */
2959   void releaseBuffer(int32_t newLength=-1);
2960 
2961   /**
2962    * Get a read-only pointer to the internal buffer.
2963    * This can be called at any time on a valid UnicodeString.
2964    *
2965    * It returns 0 if the string is bogus, or
2966    * during an "open" getBuffer(minCapacity).
2967    *
2968    * It can be called as many times as desired.
2969    * The pointer that it returns will remain valid until the UnicodeString object is modified,
2970    * at which time the pointer is semantically invalidated and must not be used any more.
2971    *
2972    * The capacity of the buffer can be determined with getCapacity().
2973    * The part after length() may or may not be initialized and valid,
2974    * depending on the history of the UnicodeString object.
2975    *
2976    * The buffer contents is (probably) not NUL-terminated.
2977    * You can check if it is with
2978    * `(s.length() < s.getCapacity() && buffer[s.length()]==0)`.
2979    * (See getTerminatedBuffer().)
2980    *
2981    * The buffer may reside in read-only memory. Its contents must not
2982    * be modified.
2983    *
2984    * @return a read-only pointer to the internal string buffer,
2985    *         or nullptr if the string is empty or bogus
2986    *
2987    * @see getBuffer(int32_t minCapacity)
2988    * @see getTerminatedBuffer()
2989    * @stable ICU 2.0
2990    */
2991   inline const char16_t *getBuffer() const;
2992 
2993   /**
2994    * Get a read-only pointer to the internal buffer,
2995    * making sure that it is NUL-terminated.
2996    * This can be called at any time on a valid UnicodeString.
2997    *
2998    * It returns 0 if the string is bogus, or
2999    * during an "open" getBuffer(minCapacity), or if the buffer cannot
3000    * be NUL-terminated (because memory allocation failed).
3001    *
3002    * It can be called as many times as desired.
3003    * The pointer that it returns will remain valid until the UnicodeString object is modified,
3004    * at which time the pointer is semantically invalidated and must not be used any more.
3005    *
3006    * The capacity of the buffer can be determined with getCapacity().
3007    * The part after length()+1 may or may not be initialized and valid,
3008    * depending on the history of the UnicodeString object.
3009    *
3010    * The buffer contents is guaranteed to be NUL-terminated.
3011    * getTerminatedBuffer() may reallocate the buffer if a terminating NUL
3012    * is written.
3013    * For this reason, this function is not const, unlike getBuffer().
3014    * Note that a UnicodeString may also contain NUL characters as part of its contents.
3015    *
3016    * The buffer may reside in read-only memory. Its contents must not
3017    * be modified.
3018    *
3019    * @return a read-only pointer to the internal string buffer,
3020    *         or 0 if the string is empty or bogus
3021    *
3022    * @see getBuffer(int32_t minCapacity)
3023    * @see getBuffer()
3024    * @stable ICU 2.2
3025    */
3026   const char16_t *getTerminatedBuffer();
3027 
3028 #ifndef U_HIDE_DRAFT_API
3029   /**
3030    * Converts to a std::u16string_view.
3031    *
3032    * @return a string view of the contents of this string
3033    * @draft ICU 76
3034    */
u16string_view()3035   inline operator std::u16string_view() const {
3036     return {getBuffer(), static_cast<std::u16string_view::size_type>(length())};
3037   }
3038 
3039 #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
3040   /**
3041    * Converts to a std::wstring_view.
3042    *
3043    * Note: This should remain draft until C++ standard plans
3044    * about char16_t vs. wchar_t become clearer.
3045    *
3046    * @return a string view of the contents of this string
3047    * @draft ICU 76
3048    */
wstring_view()3049   inline operator std::wstring_view() const {
3050     const char16_t *p = getBuffer();
3051 #ifdef U_ALIASING_BARRIER
3052     U_ALIASING_BARRIER(p);
3053 #endif
3054     return { reinterpret_cast<const wchar_t *>(p), (std::wstring_view::size_type)length() };
3055   }
3056 #endif  // U_SIZEOF_WCHAR_T
3057 #endif  // U_HIDE_DRAFT_API
3058 
3059   //========================================
3060   // Constructors
3061   //========================================
3062 
3063   /** Construct an empty UnicodeString.
3064    * @stable ICU 2.0
3065    */
3066   inline UnicodeString();
3067 
3068   /**
3069    * Construct a UnicodeString with capacity to hold `capacity` char16_ts
3070    * @param capacity the number of char16_ts this UnicodeString should hold
3071    * before a resize is necessary; if count is greater than 0 and count
3072    * code points c take up more space than capacity, then capacity is adjusted
3073    * accordingly.
3074    * @param c is used to initially fill the string
3075    * @param count specifies how many code points c are to be written in the
3076    *              string
3077    * @stable ICU 2.0
3078    */
3079   UnicodeString(int32_t capacity, UChar32 c, int32_t count);
3080 
3081   /**
3082    * Single char16_t (code unit) constructor.
3083    *
3084    * It is recommended to mark this constructor "explicit" by
3085    * `-DUNISTR_FROM_CHAR_EXPLICIT=explicit`
3086    * on the compiler command line or similar.
3087    * @param ch the character to place in the UnicodeString
3088    * @stable ICU 2.0
3089    */
3090   UNISTR_FROM_CHAR_EXPLICIT UnicodeString(char16_t ch);
3091 
3092   /**
3093    * Single UChar32 (code point) constructor.
3094    *
3095    * It is recommended to mark this constructor "explicit" by
3096    * `-DUNISTR_FROM_CHAR_EXPLICIT=explicit`
3097    * on the compiler command line or similar.
3098    * @param ch the character to place in the UnicodeString
3099    * @stable ICU 2.0
3100    */
3101   UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch);
3102 
3103 #ifdef U_HIDE_DRAFT_API
3104   /**
3105    * char16_t* constructor.
3106    *
3107    * It is recommended to mark this constructor "explicit" by
3108    * `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
3109    * on the compiler command line or similar.
3110    *
3111    * Note, for string literals:
3112    * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
3113    * length determination:
3114    * \code
3115    * UnicodeString str(u"literal");
3116    * if (str == u"other literal") { ... }
3117    * \endcode
3118    *
3119    * @param text The characters to place in the UnicodeString.  `text`
3120    * must be NUL (U+0000) terminated.
3121    * @stable ICU 2.0
3122    */
UnicodeString(const char16_t * text)3123   UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text) :
3124       UnicodeString(text, -1) {}
3125 #endif  // U_HIDE_DRAFT_API
3126 
3127 #if !U_CHAR16_IS_TYPEDEF && \
3128     (defined(U_HIDE_DRAFT_API) || (defined(_LIBCPP_VERSION) && _LIBCPP_VERSION >= 180000))
3129   /**
3130    * uint16_t * constructor.
3131    * Delegates to UnicodeString(const char16_t *).
3132    *
3133    * It is recommended to mark this constructor "explicit" by
3134    * `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
3135    * on the compiler command line or similar.
3136    *
3137    * Note, for string literals:
3138    * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
3139    * length determination:
3140    * \code
3141    * UnicodeString str(u"literal");
3142    * if (str == u"other literal") { ... }
3143    * \endcode
3144    *
3145    * @param text NUL-terminated UTF-16 string
3146    * @stable ICU 59
3147    */
UnicodeString(const uint16_t * text)3148   UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) :
3149       UnicodeString(ConstChar16Ptr(text), -1) {}
3150 #endif
3151 
3152 #if defined(U_HIDE_DRAFT_API) && (U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN))
3153   /**
3154    * wchar_t * constructor.
3155    * (Only defined if U_SIZEOF_WCHAR_T==2.)
3156    * Delegates to UnicodeString(const char16_t *).
3157    *
3158    * It is recommended to mark this constructor "explicit" by
3159    * `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
3160    * on the compiler command line or similar.
3161    *
3162    * Note, for string literals:
3163    * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
3164    * length determination:
3165    * \code
3166    * UnicodeString str(u"literal");
3167    * if (str == u"other literal") { ... }
3168    * \endcode
3169    *
3170    * @param text NUL-terminated UTF-16 string
3171    * @stable ICU 59
3172    */
UnicodeString(const wchar_t * text)3173   UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) :
3174       UnicodeString(ConstChar16Ptr(text), -1) {}
3175 #endif
3176 
3177   /**
3178    * nullptr_t constructor.
3179    * Effectively the same as the default constructor, makes an empty string object.
3180    *
3181    * It is recommended to mark this constructor "explicit" by
3182    * `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
3183    * on the compiler command line or similar.
3184    * @param text nullptr
3185    * @stable ICU 59
3186    */
3187   UNISTR_FROM_STRING_EXPLICIT inline UnicodeString(const std::nullptr_t text);
3188 
3189   /**
3190    * char16_t* constructor.
3191    *
3192    * Note, for string literals:
3193    * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
3194    * length determination:
3195    * \code
3196    * UnicodeString str(u"literal");
3197    * if (str == u"other literal") { ... }
3198    * \endcode
3199    *
3200    * @param text The characters to place in the UnicodeString.
3201    * @param textLength The number of Unicode characters in `text`
3202    * to copy.
3203    * @stable ICU 2.0
3204    */
3205   UnicodeString(const char16_t *text,
3206         int32_t textLength);
3207 
3208 #if !U_CHAR16_IS_TYPEDEF
3209   /**
3210    * uint16_t * constructor.
3211    * Delegates to UnicodeString(const char16_t *, int32_t).
3212    *
3213    * Note, for string literals:
3214    * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
3215    * length determination:
3216    * \code
3217    * UnicodeString str(u"literal");
3218    * if (str == u"other literal") { ... }
3219    * \endcode
3220    *
3221    * @param text UTF-16 string
3222    * @param textLength string length
3223    * @stable ICU 59
3224    */
UnicodeString(const uint16_t * text,int32_t textLength)3225   UnicodeString(const uint16_t *text, int32_t textLength) :
3226       UnicodeString(ConstChar16Ptr(text), textLength) {}
3227 #endif
3228 
3229 #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
3230   /**
3231    * wchar_t * constructor.
3232    * (Only defined if U_SIZEOF_WCHAR_T==2.)
3233    * Delegates to UnicodeString(const char16_t *, int32_t).
3234    *
3235    * Note, for string literals:
3236    * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
3237    * length determination:
3238    * \code
3239    * UnicodeString str(u"literal");
3240    * if (str == u"other literal") { ... }
3241    * \endcode
3242    *
3243    * @param text UTF-16 string
3244    * @param textLength string length
3245    * @stable ICU 59
3246    */
UnicodeString(const wchar_t * text,int32_t textLength)3247   UnicodeString(const wchar_t *text, int32_t textLength) :
3248       UnicodeString(ConstChar16Ptr(text), textLength) {}
3249 #endif
3250 
3251   /**
3252    * nullptr_t constructor.
3253    * Effectively the same as the default constructor, makes an empty string object.
3254    * @param text nullptr
3255    * @param textLength ignored
3256    * @stable ICU 59
3257    */
3258   inline UnicodeString(const std::nullptr_t text, int32_t textLength);
3259 
3260 #ifndef U_HIDE_DRAFT_API
3261   /**
3262    * Constructor from `text`
3263    * which is, or which is implicitly convertible to,
3264    * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.
3265    * The string is bogus if the string view is too long.
3266    *
3267    * If you need a UnicodeString but need not copy the string view contents,
3268    * then you can call the UnicodeString::readOnlyAlias() function instead of this constructor.
3269    *
3270    * @param text UTF-16 string
3271    * @draft ICU 76
3272    */
3273   template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
UnicodeString(const S & text)3274   UNISTR_FROM_STRING_EXPLICIT UnicodeString(const S &text) {
3275     fUnion.fFields.fLengthAndFlags = kShortString;
3276     doAppend(internal::toU16StringViewNullable(text));
3277   }
3278 #endif  // U_HIDE_DRAFT_API
3279 
3280   /**
3281    * Readonly-aliasing char16_t* constructor.
3282    * The text will be used for the UnicodeString object, but
3283    * it will not be released when the UnicodeString is destroyed.
3284    * This has copy-on-write semantics:
3285    * When the string is modified, then the buffer is first copied into
3286    * newly allocated memory.
3287    * The aliased buffer is never modified.
3288    *
3289    * In an assignment to another UnicodeString, when using the copy constructor
3290    * or the assignment operator, the text will be copied.
3291    * When using fastCopyFrom(), the text will be aliased again,
3292    * so that both strings then alias the same readonly-text.
3293    *
3294    * Note, for string literals:
3295    * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
3296    * length determination:
3297    * \code
3298    * UnicodeString alias = UnicodeString::readOnlyAlias(u"literal");
3299    * if (str == u"other literal") { ... }
3300    * \endcode
3301    *
3302    * @param isTerminated specifies if `text` is `NUL`-terminated.
3303    *                     This must be true if `textLength==-1`.
3304    * @param text The characters to alias for the UnicodeString.
3305    * @param textLength The number of Unicode characters in `text` to alias.
3306    *                   If -1, then this constructor will determine the length
3307    *                   by calling `u_strlen()`.
3308    * @stable ICU 2.0
3309    */
3310   UnicodeString(UBool isTerminated,
3311                 ConstChar16Ptr text,
3312                 int32_t textLength);
3313 
3314   /**
3315    * Writable-aliasing char16_t* constructor.
3316    * The text will be used for the UnicodeString object, but
3317    * it will not be released when the UnicodeString is destroyed.
3318    * This has write-through semantics:
3319    * For as long as the capacity of the buffer is sufficient, write operations
3320    * will directly affect the buffer. When more capacity is necessary, then
3321    * a new buffer will be allocated and the contents copied as with regularly
3322    * constructed strings.
3323    * In an assignment to another UnicodeString, the buffer will be copied.
3324    * The extract(Char16Ptr dst) function detects whether the dst pointer is the same
3325    * as the string buffer itself and will in this case not copy the contents.
3326    *
3327    * @param buffer The characters to alias for the UnicodeString.
3328    * @param buffLength The number of Unicode characters in `buffer` to alias.
3329    * @param buffCapacity The size of `buffer` in char16_ts.
3330    * @stable ICU 2.0
3331    */
3332   UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity);
3333 
3334 #if !U_CHAR16_IS_TYPEDEF
3335   /**
3336    * Writable-aliasing uint16_t * constructor.
3337    * Delegates to UnicodeString(const char16_t *, int32_t, int32_t).
3338    * @param buffer writable buffer of/for UTF-16 text
3339    * @param buffLength length of the current buffer contents
3340    * @param buffCapacity buffer capacity
3341    * @stable ICU 59
3342    */
UnicodeString(uint16_t * buffer,int32_t buffLength,int32_t buffCapacity)3343   UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity) :
3344       UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {}
3345 #endif
3346 
3347 #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
3348   /**
3349    * Writable-aliasing wchar_t * constructor.
3350    * (Only defined if U_SIZEOF_WCHAR_T==2.)
3351    * Delegates to UnicodeString(const char16_t *, int32_t, int32_t).
3352    * @param buffer writable buffer of/for UTF-16 text
3353    * @param buffLength length of the current buffer contents
3354    * @param buffCapacity buffer capacity
3355    * @stable ICU 59
3356    */
UnicodeString(wchar_t * buffer,int32_t buffLength,int32_t buffCapacity)3357   UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity) :
3358       UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {}
3359 #endif
3360 
3361   /**
3362    * Writable-aliasing nullptr_t constructor.
3363    * Effectively the same as the default constructor, makes an empty string object.
3364    * @param buffer nullptr
3365    * @param buffLength ignored
3366    * @param buffCapacity ignored
3367    * @stable ICU 59
3368    */
3369   inline UnicodeString(std::nullptr_t buffer, int32_t buffLength, int32_t buffCapacity);
3370 
3371 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
3372 
3373   /**
3374    * char* constructor.
3375    * Uses the default converter (and thus depends on the ICU conversion code)
3376    * unless U_CHARSET_IS_UTF8 is set to 1.
3377    *
3378    * For ASCII (really "invariant character") strings it is more efficient to use
3379    * the constructor that takes a US_INV (for its enum EInvariant).
3380    *
3381    * Note, for string literals:
3382    * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
3383    * length determination:
3384    * \code
3385    * UnicodeString str(u"literal");
3386    * if (str == u"other literal") { ... }
3387    * \endcode
3388    *
3389    * It is recommended to mark this constructor "explicit" by
3390    * `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
3391    * on the compiler command line or similar.
3392    * @param codepageData an array of bytes, null-terminated,
3393    *                     in the platform's default codepage.
3394    * @stable ICU 2.0
3395    */
3396   UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
3397 
3398   /**
3399    * char* constructor.
3400    * Uses the default converter (and thus depends on the ICU conversion code)
3401    * unless U_CHARSET_IS_UTF8 is set to 1.
3402    * @param codepageData an array of bytes in the platform's default codepage.
3403    * @param dataLength The number of bytes in `codepageData`.
3404    * @stable ICU 2.0
3405    */
3406   UnicodeString(const char *codepageData, int32_t dataLength);
3407 
3408 #endif
3409 
3410 #if !UCONFIG_NO_CONVERSION
3411 
3412   /**
3413    * char* constructor.
3414    * @param codepageData an array of bytes, null-terminated
3415    * @param codepage the encoding of `codepageData`.  The special
3416    * value 0 for `codepage` indicates that the text is in the
3417    * platform's default codepage.
3418    *
3419    * If `codepage` is an empty string (`""`),
3420    * then a simple conversion is performed on the codepage-invariant
3421    * subset ("invariant characters") of the platform encoding. See utypes.h.
3422    * Recommendation: For invariant-character strings use the constructor
3423    * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
3424    * because it avoids object code dependencies of UnicodeString on
3425    * the conversion code.
3426    *
3427    * @stable ICU 2.0
3428    */
3429   UnicodeString(const char *codepageData, const char *codepage);
3430 
3431   /**
3432    * char* constructor.
3433    * @param codepageData an array of bytes.
3434    * @param dataLength The number of bytes in `codepageData`.
3435    * @param codepage the encoding of `codepageData`.  The special
3436    * value 0 for `codepage` indicates that the text is in the
3437    * platform's default codepage.
3438    * If `codepage` is an empty string (`""`),
3439    * then a simple conversion is performed on the codepage-invariant
3440    * subset ("invariant characters") of the platform encoding. See utypes.h.
3441    * Recommendation: For invariant-character strings use the constructor
3442    * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
3443    * because it avoids object code dependencies of UnicodeString on
3444    * the conversion code.
3445    *
3446    * @stable ICU 2.0
3447    */
3448   UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
3449 
3450   /**
3451    * char * / UConverter constructor.
3452    * This constructor uses an existing UConverter object to
3453    * convert the codepage string to Unicode and construct a UnicodeString
3454    * from that.
3455    *
3456    * The converter is reset at first.
3457    * If the error code indicates a failure before this constructor is called,
3458    * or if an error occurs during conversion or construction,
3459    * then the string will be bogus.
3460    *
3461    * This function avoids the overhead of opening and closing a converter if
3462    * multiple strings are constructed.
3463    *
3464    * @param src input codepage string
3465    * @param srcLength length of the input string, can be -1 for NUL-terminated strings
3466    * @param cnv converter object (ucnv_resetToUnicode() will be called),
3467    *        can be nullptr for the default converter
3468    * @param errorCode normal ICU error code
3469    * @stable ICU 2.0
3470    */
3471   UnicodeString(
3472         const char *src, int32_t srcLength,
3473         UConverter *cnv,
3474         UErrorCode &errorCode);
3475 
3476 #endif
3477 
3478   /**
3479    * Constructs a Unicode string from an invariant-character char * string.
3480    * About invariant characters see utypes.h.
3481    * This constructor has no runtime dependency on conversion code and is
3482    * therefore recommended over ones taking a charset name string
3483    * (where the empty string "" indicates invariant-character conversion).
3484    *
3485    * Use the macro US_INV as the third, signature-distinguishing parameter.
3486    *
3487    * For example:
3488    * \code
3489    *     void fn(const char *s) {
3490    *       UnicodeString ustr(s, -1, US_INV);
3491    *       // use ustr ...
3492    *     }
3493    * \endcode
3494    *
3495    * Note, for string literals:
3496    * Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
3497    * length determination:
3498    * \code
3499    * UnicodeString str(u"literal");
3500    * if (str == u"other literal") { ... }
3501    * \endcode
3502    *
3503    * @param src String using only invariant characters.
3504    * @param textLength Length of src, or -1 if NUL-terminated.
3505    * @param inv Signature-distinguishing parameter, use US_INV.
3506    *
3507    * @see US_INV
3508    * @stable ICU 3.2
3509    */
3510   UnicodeString(const char *src, int32_t textLength, enum EInvariant inv);
3511 
3512 
3513   /**
3514    * Copy constructor.
3515    *
3516    * Starting with ICU 2.4, the assignment operator and the copy constructor
3517    * allocate a new buffer and copy the buffer contents even for readonly aliases.
3518    * By contrast, the fastCopyFrom() function implements the old,
3519    * more efficient but less safe behavior
3520    * of making this string also a readonly alias to the same buffer.
3521    *
3522    * If the source object has an "open" buffer from getBuffer(minCapacity),
3523    * then the copy is an empty string.
3524    *
3525    * @param that The UnicodeString object to copy.
3526    * @stable ICU 2.0
3527    * @see fastCopyFrom
3528    */
3529   UnicodeString(const UnicodeString& that);
3530 
3531   /**
3532    * Move constructor; might leave src in bogus state.
3533    * This string will have the same contents and state that the source string had.
3534    * @param src source string
3535    * @stable ICU 56
3536    */
3537   UnicodeString(UnicodeString &&src) noexcept;
3538 
3539   /**
3540    * 'Substring' constructor from tail of source string.
3541    * @param src The UnicodeString object to copy.
3542    * @param srcStart The offset into `src` at which to start copying.
3543    * @stable ICU 2.2
3544    */
3545   UnicodeString(const UnicodeString& src, int32_t srcStart);
3546 
3547   /**
3548    * 'Substring' constructor from subrange of source string.
3549    * @param src The UnicodeString object to copy.
3550    * @param srcStart The offset into `src` at which to start copying.
3551    * @param srcLength The number of characters from `src` to copy.
3552    * @stable ICU 2.2
3553    */
3554   UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3555 
3556   /**
3557    * Clone this object, an instance of a subclass of Replaceable.
3558    * Clones can be used concurrently in multiple threads.
3559    * If a subclass does not implement clone(), or if an error occurs,
3560    * then nullptr is returned.
3561    * The caller must delete the clone.
3562    *
3563    * @return a clone of this object
3564    *
3565    * @see Replaceable::clone
3566    * @see getDynamicClassID
3567    * @stable ICU 2.6
3568    */
3569   virtual UnicodeString *clone() const override;
3570 
3571   /** Destructor.
3572    * @stable ICU 2.0
3573    */
3574   virtual ~UnicodeString();
3575 
3576 #ifndef U_HIDE_DRAFT_API
3577   /**
3578    * Readonly-aliasing factory method.
3579    * Aliases the same buffer as the input `text`
3580    * which is, or which is implicitly convertible to,
3581    * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.
3582    * The string is bogus if the string view is too long.
3583    *
3584    * The text will be used for the UnicodeString object, but
3585    * it will not be released when the UnicodeString is destroyed.
3586    * This has copy-on-write semantics:
3587    * When the string is modified, then the buffer is first copied into
3588    * newly allocated memory.
3589    * The aliased buffer is never modified.
3590    *
3591    * In an assignment to another UnicodeString, when using the copy constructor
3592    * or the assignment operator, the text will be copied.
3593    * When using fastCopyFrom(), the text will be aliased again,
3594    * so that both strings then alias the same readonly-text.
3595    *
3596    * @param text The string view to alias for the UnicodeString.
3597    * @draft ICU 76
3598    */
3599   template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
readOnlyAlias(const S & text)3600   static inline UnicodeString readOnlyAlias(const S &text) {
3601     return readOnlyAliasFromU16StringView(internal::toU16StringView(text));
3602   }
3603 
3604   /**
3605    * Readonly-aliasing factory method.
3606    * Aliases the same buffer as the input `text`.
3607    *
3608    * The text will be used for the UnicodeString object, but
3609    * it will not be released when the UnicodeString is destroyed.
3610    * This has copy-on-write semantics:
3611    * When the string is modified, then the buffer is first copied into
3612    * newly allocated memory.
3613    * The aliased buffer is never modified.
3614    *
3615    * In an assignment to another UnicodeString, when using the copy constructor
3616    * or the assignment operator, the text will be copied.
3617    * When using fastCopyFrom(), the text will be aliased again,
3618    * so that both strings then alias the same readonly-text.
3619    *
3620    * @param text The UnicodeString to alias.
3621    * @draft ICU 76
3622    */
readOnlyAlias(const UnicodeString & text)3623   static inline UnicodeString readOnlyAlias(const UnicodeString &text) {
3624     return readOnlyAliasFromUnicodeString(text);
3625   }
3626 #endif  // U_HIDE_DRAFT_API
3627 
3628   /**
3629    * Create a UnicodeString from a UTF-8 string.
3630    * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
3631    * Calls u_strFromUTF8WithSub().
3632    *
3633    * @param utf8 UTF-8 input string.
3634    *             Note that a StringPiece can be implicitly constructed
3635    *             from a std::string or a NUL-terminated const char * string.
3636    * @return A UnicodeString with equivalent UTF-16 contents.
3637    * @see toUTF8
3638    * @see toUTF8String
3639    * @stable ICU 4.2
3640    */
3641   static UnicodeString fromUTF8(StringPiece utf8);
3642 
3643   /**
3644    * Create a UnicodeString from a UTF-32 string.
3645    * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
3646    * Calls u_strFromUTF32WithSub().
3647    *
3648    * @param utf32 UTF-32 input string. Must not be nullptr.
3649    * @param length Length of the input string, or -1 if NUL-terminated.
3650    * @return A UnicodeString with equivalent UTF-16 contents.
3651    * @see toUTF32
3652    * @stable ICU 4.2
3653    */
3654   static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
3655 
3656   /* Miscellaneous operations */
3657 
3658   /**
3659    * Unescape a string of characters and return a string containing
3660    * the result.  The following escape sequences are recognized:
3661    *
3662    * \\uhhhh       4 hex digits; h in [0-9A-Fa-f]
3663    * \\Uhhhhhhhh   8 hex digits
3664    * \\xhh         1-2 hex digits
3665    * \\ooo         1-3 octal digits; o in [0-7]
3666    * \\cX          control-X; X is masked with 0x1F
3667    *
3668    * as well as the standard ANSI C escapes:
3669    *
3670    * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
3671    * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
3672    * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
3673    *
3674    * Anything else following a backslash is generically escaped.  For
3675    * example, "[a\\-z]" returns "[a-z]".
3676    *
3677    * If an escape sequence is ill-formed, this method returns an empty
3678    * string.  An example of an ill-formed sequence is "\\u" followed by
3679    * fewer than 4 hex digits.
3680    *
3681    * This function is similar to u_unescape() but not identical to it.
3682    * The latter takes a source char*, so it does escape recognition
3683    * and also invariant conversion.
3684    *
3685    * @return a string with backslash escapes interpreted, or an
3686    * empty string on error.
3687    * @see UnicodeString#unescapeAt()
3688    * @see u_unescape()
3689    * @see u_unescapeAt()
3690    * @stable ICU 2.0
3691    */
3692   UnicodeString unescape() const;
3693 
3694   /**
3695    * Unescape a single escape sequence and return the represented
3696    * character.  See unescape() for a listing of the recognized escape
3697    * sequences.  The character at offset-1 is assumed (without
3698    * checking) to be a backslash.  If the escape sequence is
3699    * ill-formed, or the offset is out of range, U_SENTINEL=-1 is
3700    * returned.
3701    *
3702    * @param offset an input output parameter.  On input, it is the
3703    * offset into this string where the escape sequence is located,
3704    * after the initial backslash.  On output, it is advanced after the
3705    * last character parsed.  On error, it is not advanced at all.
3706    * @return the character represented by the escape sequence at
3707    * offset, or U_SENTINEL=-1 on error.
3708    * @see UnicodeString#unescape()
3709    * @see u_unescape()
3710    * @see u_unescapeAt()
3711    * @stable ICU 2.0
3712    */
3713   UChar32 unescapeAt(int32_t &offset) const;
3714 
3715   /**
3716    * ICU "poor man's RTTI", returns a UClassID for this class.
3717    *
3718    * @stable ICU 2.2
3719    */
3720   static UClassID U_EXPORT2 getStaticClassID();
3721 
3722   /**
3723    * ICU "poor man's RTTI", returns a UClassID for the actual class.
3724    *
3725    * @stable ICU 2.2
3726    */
3727   virtual UClassID getDynamicClassID() const override;
3728 
3729   //========================================
3730   // Implementation methods
3731   //========================================
3732 
3733 protected:
3734   /**
3735    * Implement Replaceable::getLength() (see jitterbug 1027).
3736    * @stable ICU 2.4
3737    */
3738   virtual int32_t getLength() const override;
3739 
3740   /**
3741    * The change in Replaceable to use virtual getCharAt() allows
3742    * UnicodeString::charAt() to be inline again (see jitterbug 709).
3743    * @stable ICU 2.4
3744    */
3745   virtual char16_t getCharAt(int32_t offset) const override;
3746 
3747   /**
3748    * The change in Replaceable to use virtual getChar32At() allows
3749    * UnicodeString::char32At() to be inline again (see jitterbug 709).
3750    * @stable ICU 2.4
3751    */
3752   virtual UChar32 getChar32At(int32_t offset) const override;
3753 
3754 private:
3755   static UnicodeString readOnlyAliasFromU16StringView(std::u16string_view text);
3756   static UnicodeString readOnlyAliasFromUnicodeString(const UnicodeString &text);
3757 
3758   // For char* constructors. Could be made public.
3759   UnicodeString &setToUTF8(StringPiece utf8);
3760   // For extract(char*).
3761   // We could make a toUTF8(target, capacity, errorCode) public but not
3762   // this version: New API will be cleaner if we make callers create substrings
3763   // rather than having start+length on every method,
3764   // and it should take a UErrorCode&.
3765   int32_t
3766   toUTF8(int32_t start, int32_t len,
3767          char *target, int32_t capacity) const;
3768 
3769   /**
3770    * Internal string contents comparison, called by operator==.
3771    * Requires: this & text not bogus and have same lengths.
3772    */
doEquals(const UnicodeString & text,int32_t len)3773   inline UBool doEquals(const UnicodeString &text, int32_t len) const {
3774     return doEquals(text.getArrayStart(), len);
3775   }
3776   UBool doEquals(const char16_t *text, int32_t len) const;
3777 
3778   inline UBool
3779   doEqualsSubstring(int32_t start,
3780            int32_t length,
3781            const UnicodeString& srcText,
3782            int32_t srcStart,
3783            int32_t srcLength) const;
3784 
3785   UBool doEqualsSubstring(int32_t start,
3786            int32_t length,
3787            const char16_t *srcChars,
3788            int32_t srcStart,
3789            int32_t srcLength) const;
3790 
3791   inline int8_t
3792   doCompare(int32_t start,
3793            int32_t length,
3794            const UnicodeString& srcText,
3795            int32_t srcStart,
3796            int32_t srcLength) const;
3797 
3798   int8_t doCompare(int32_t start,
3799            int32_t length,
3800            const char16_t *srcChars,
3801            int32_t srcStart,
3802            int32_t srcLength) const;
3803 
3804   inline int8_t
3805   doCompareCodePointOrder(int32_t start,
3806                           int32_t length,
3807                           const UnicodeString& srcText,
3808                           int32_t srcStart,
3809                           int32_t srcLength) const;
3810 
3811   int8_t doCompareCodePointOrder(int32_t start,
3812                                  int32_t length,
3813                                  const char16_t *srcChars,
3814                                  int32_t srcStart,
3815                                  int32_t srcLength) const;
3816 
3817   inline int8_t
3818   doCaseCompare(int32_t start,
3819                 int32_t length,
3820                 const UnicodeString &srcText,
3821                 int32_t srcStart,
3822                 int32_t srcLength,
3823                 uint32_t options) const;
3824 
3825   int8_t
3826   doCaseCompare(int32_t start,
3827                 int32_t length,
3828                 const char16_t *srcChars,
3829                 int32_t srcStart,
3830                 int32_t srcLength,
3831                 uint32_t options) const;
3832 
3833   int32_t doIndexOf(char16_t c,
3834             int32_t start,
3835             int32_t length) const;
3836 
3837   int32_t doIndexOf(UChar32 c,
3838                         int32_t start,
3839                         int32_t length) const;
3840 
3841   int32_t doLastIndexOf(char16_t c,
3842                 int32_t start,
3843                 int32_t length) const;
3844 
3845   int32_t doLastIndexOf(UChar32 c,
3846                             int32_t start,
3847                             int32_t length) const;
3848 
3849   void doExtract(int32_t start,
3850          int32_t length,
3851          char16_t *dst,
3852          int32_t dstStart) const;
3853 
3854   inline void doExtract(int32_t start,
3855          int32_t length,
3856          UnicodeString& target) const;
3857 
3858   inline char16_t doCharAt(int32_t offset)  const;
3859 
3860   UnicodeString& doReplace(int32_t start,
3861                int32_t length,
3862                const UnicodeString& srcText,
3863                int32_t srcStart,
3864                int32_t srcLength);
3865 
3866   UnicodeString& doReplace(int32_t start,
3867                int32_t length,
3868                const char16_t *srcChars,
3869                int32_t srcStart,
3870                int32_t srcLength);
3871   UnicodeString& doReplace(int32_t start, int32_t length, std::u16string_view src);
3872 
3873   UnicodeString& doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3874   UnicodeString& doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcLength);
3875   UnicodeString& doAppend(std::u16string_view src);
3876 
3877   UnicodeString& doReverse(int32_t start,
3878                int32_t length);
3879 
3880   // calculate hash code
3881   int32_t doHashCode() const;
3882 
3883   // get pointer to start of array
3884   // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
3885   inline char16_t* getArrayStart();
3886   inline const char16_t* getArrayStart() const;
3887 
3888   inline UBool hasShortLength() const;
3889   inline int32_t getShortLength() const;
3890 
3891   // A UnicodeString object (not necessarily its current buffer)
3892   // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
3893   inline UBool isWritable() const;
3894 
3895   // Is the current buffer writable?
3896   inline UBool isBufferWritable() const;
3897 
3898   // None of the following does releaseArray().
3899   inline void setZeroLength();
3900   inline void setShortLength(int32_t len);
3901   inline void setLength(int32_t len);
3902   inline void setToEmpty();
3903   inline void setArray(char16_t *array, int32_t len, int32_t capacity); // sets length but not flags
3904 
3905   // allocate the array; result may be the stack buffer
3906   // sets refCount to 1 if appropriate
3907   // sets fArray, fCapacity, and flags
3908   // sets length to 0
3909   // returns boolean for success or failure
3910   UBool allocate(int32_t capacity);
3911 
3912   // release the array if owned
3913   void releaseArray();
3914 
3915   // turn a bogus string into an empty one
3916   void unBogus();
3917 
3918   // implements assignment operator, copy constructor, and fastCopyFrom()
3919   UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=false);
3920 
3921   // Copies just the fields without memory management.
3922   void copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) noexcept;
3923 
3924   // Pin start and limit to acceptable values.
3925   inline void pinIndex(int32_t& start) const;
3926   inline void pinIndices(int32_t& start,
3927                          int32_t& length) const;
3928 
3929 #if !UCONFIG_NO_CONVERSION
3930 
3931   /* Internal extract() using UConverter. */
3932   int32_t doExtract(int32_t start, int32_t length,
3933                     char *dest, int32_t destCapacity,
3934                     UConverter *cnv,
3935                     UErrorCode &errorCode) const;
3936 
3937   /*
3938    * Real constructor for converting from codepage data.
3939    * It assumes that it is called with !fRefCounted.
3940    *
3941    * If `codepage==0`, then the default converter
3942    * is used for the platform encoding.
3943    * If `codepage` is an empty string (`""`),
3944    * then a simple conversion is performed on the codepage-invariant
3945    * subset ("invariant characters") of the platform encoding. See utypes.h.
3946    */
3947   void doCodepageCreate(const char *codepageData,
3948                         int32_t dataLength,
3949                         const char *codepage);
3950 
3951   /*
3952    * Worker function for creating a UnicodeString from
3953    * a codepage string using a UConverter.
3954    */
3955   void
3956   doCodepageCreate(const char *codepageData,
3957                    int32_t dataLength,
3958                    UConverter *converter,
3959                    UErrorCode &status);
3960 
3961 #endif
3962 
3963   /*
3964    * This function is called when write access to the array
3965    * is necessary.
3966    *
3967    * We need to make a copy of the array if
3968    * the buffer is read-only, or
3969    * the buffer is refCounted (shared), and refCount>1, or
3970    * the buffer is too small.
3971    *
3972    * Return false if memory could not be allocated.
3973    */
3974   UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
3975                            int32_t growCapacity = -1,
3976                            UBool doCopyArray = true,
3977                            int32_t** pBufferToDelete = nullptr,
3978                            UBool forceClone = false);
3979 
3980   /**
3981    * Common function for UnicodeString case mappings.
3982    * The stringCaseMapper has the same type UStringCaseMapper
3983    * as in ustr_imp.h for ustrcase_map().
3984    */
3985   UnicodeString &
3986   caseMap(int32_t caseLocale, uint32_t options,
3987 #if !UCONFIG_NO_BREAK_ITERATION
3988           BreakIterator *iter,
3989 #endif
3990           UStringCaseMapper *stringCaseMapper);
3991 
3992   // ref counting
3993   void addRef();
3994   int32_t removeRef();
3995   int32_t refCount() const;
3996 
3997   // constants
3998   enum {
3999     /**
4000      * Size of stack buffer for short strings.
4001      * Must be at least U16_MAX_LENGTH for the single-code point constructor to work.
4002      * @see UNISTR_OBJECT_SIZE
4003      */
4004     US_STACKBUF_SIZE = static_cast<int32_t>(UNISTR_OBJECT_SIZE - sizeof(void*) - 2) / U_SIZEOF_UCHAR,
4005     kInvalidUChar=0xffff, // U+FFFF returned by charAt(invalid index)
4006     kInvalidHashCode=0, // invalid hash code
4007     kEmptyHashCode=1, // hash code for empty string
4008 
4009     // bit flag values for fLengthAndFlags
4010     kIsBogus=1,         // this string is bogus, i.e., not valid or nullptr
4011     kUsingStackBuffer=2,// using fUnion.fStackFields instead of fUnion.fFields
4012     kRefCounted=4,      // there is a refCount field before the characters in fArray
4013     kBufferIsReadonly=8,// do not write to this buffer
4014     kOpenGetBuffer=16,  // getBuffer(minCapacity) was called (is "open"),
4015                         // and releaseBuffer(newLength) must be called
4016     kAllStorageFlags=0x1f,
4017 
4018     kLengthShift=5,     // remaining 11 bits for non-negative short length, or negative if long
4019     kLength1=1<<kLengthShift,
4020     kMaxShortLength=0x3ff,  // max non-negative short length (leaves top bit 0)
4021     kLengthIsLarge=0xffe0,  // short length < 0, real length is in fUnion.fFields.fLength
4022 
4023     // combined values for convenience
4024     kShortString=kUsingStackBuffer,
4025     kLongString=kRefCounted,
4026     kReadonlyAlias=kBufferIsReadonly,
4027     kWritableAlias=0
4028   };
4029 
4030   friend class UnicodeStringAppendable;
4031 
4032   union StackBufferOrFields;        // forward declaration necessary before friend declaration
4033   friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
4034 
4035   /*
4036    * The following are all the class fields that are stored
4037    * in each UnicodeString object.
4038    * Note that UnicodeString has virtual functions,
4039    * therefore there is an implicit vtable pointer
4040    * as the first real field.
4041    * The fields should be aligned such that no padding is necessary.
4042    * On 32-bit machines, the size should be 32 bytes,
4043    * on 64-bit machines (8-byte pointers), it should be 40 bytes.
4044    *
4045    * We use a hack to achieve this.
4046    *
4047    * With at least some compilers, each of the following is forced to
4048    * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
4049    * rounded up with additional padding if the fields do not already fit that requirement:
4050    * - sizeof(class UnicodeString)
4051    * - offsetof(UnicodeString, fUnion)
4052    * - sizeof(fUnion)
4053    * - sizeof(fStackFields)
4054    *
4055    * We optimize for the longest possible internal buffer for short strings.
4056    * fUnion.fStackFields begins with 2 bytes for storage flags
4057    * and the length of relatively short strings,
4058    * followed by the buffer for short string contents.
4059    * There is no padding inside fStackFields.
4060    *
4061    * Heap-allocated and aliased strings use fUnion.fFields.
4062    * Both fStackFields and fFields must begin with the same fields for flags and short length,
4063    * that is, those must have the same memory offsets inside the object,
4064    * because the flags must be inspected in order to decide which half of fUnion is being used.
4065    * We assume that the compiler does not reorder the fields.
4066    *
4067    * (Padding at the end of fFields is ok:
4068    * As long as it is no larger than fStackFields, it is not wasted space.)
4069    *
4070    * For some of the history of the UnicodeString class fields layout, see
4071    * - ICU ticket #11551 "longer UnicodeString contents in stack buffer"
4072    * - ICU ticket #11336 "UnicodeString: recombine stack buffer arrays"
4073    * - ICU ticket #8322 "why is sizeof(UnicodeString)==48?"
4074    */
4075   // (implicit) *vtable;
4076   union StackBufferOrFields {
4077     // fStackFields is used iff (fLengthAndFlags&kUsingStackBuffer) else fFields is used.
4078     // Each struct of the union must begin with fLengthAndFlags.
4079     struct {
4080       int16_t fLengthAndFlags;          // bit fields: see constants above
4081       char16_t fBuffer[US_STACKBUF_SIZE];  // buffer for short strings
4082     } fStackFields;
4083     struct {
4084       int16_t fLengthAndFlags;          // bit fields: see constants above
4085       int32_t fLength;    // number of characters in fArray if >127; else undefined
4086       int32_t fCapacity;  // capacity of fArray (in char16_ts)
4087       // array pointer last to minimize padding for machines with P128 data model
4088       // or pointer sizes that are not a power of 2
4089       char16_t   *fArray;    // the Unicode data
4090     } fFields;
4091   } fUnion;
4092 };
4093 
4094 /**
4095  * Creates a new UnicodeString from the concatenation of two others.
4096  *
4097  * @param s1 The first string to be copied to the new one.
4098  * @param s2 The second string to be copied to the new one, after s1.
4099  * @return UnicodeString(s1).append(s2)
4100  * @stable ICU 2.8
4101  */
4102 U_COMMON_API UnicodeString U_EXPORT2
4103 operator+ (const UnicodeString &s1, const UnicodeString &s2);
4104 
4105 #ifndef U_HIDE_DRAFT_API
4106 /**
4107  * Creates a new UnicodeString from the concatenation of a UnicodeString and `s2`
4108  * which is, or which is implicitly convertible to,
4109  * a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.
4110  *
4111  * @param s1 The string to be copied to the new one.
4112  * @param s2 The string view to be copied to the new string, after s1.
4113  * @return UnicodeString(s1).append(s2)
4114  * @draft ICU 76
4115  */
4116 template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
4117 inline UnicodeString operator+(const UnicodeString &s1, const S &s2) {
4118   return unistr_internalConcat(s1, internal::toU16StringView(s2));
4119 }
4120 #endif  // U_HIDE_DRAFT_API
4121 
4122 #ifndef U_FORCE_HIDE_INTERNAL_API
4123 /** @internal */
4124 U_COMMON_API UnicodeString U_EXPORT2
4125 unistr_internalConcat(const UnicodeString &s1, std::u16string_view s2);
4126 #endif
4127 
4128 //========================================
4129 // Inline members
4130 //========================================
4131 
4132 //========================================
4133 // Privates
4134 //========================================
4135 
4136 inline void
pinIndex(int32_t & start)4137 UnicodeString::pinIndex(int32_t& start) const
4138 {
4139   // pin index
4140   if(start < 0) {
4141     start = 0;
4142   } else if(start > length()) {
4143     start = length();
4144   }
4145 }
4146 
4147 inline void
pinIndices(int32_t & start,int32_t & _length)4148 UnicodeString::pinIndices(int32_t& start,
4149                           int32_t& _length) const
4150 {
4151   // pin indices
4152   int32_t len = length();
4153   if(start < 0) {
4154     start = 0;
4155   } else if(start > len) {
4156     start = len;
4157   }
4158   if(_length < 0) {
4159     _length = 0;
4160   } else if(_length > (len - start)) {
4161     _length = (len - start);
4162   }
4163 }
4164 
4165 inline char16_t*
getArrayStart()4166 UnicodeString::getArrayStart() {
4167   return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
4168     fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
4169 }
4170 
4171 inline const char16_t*
getArrayStart()4172 UnicodeString::getArrayStart() const {
4173   return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
4174     fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
4175 }
4176 
4177 //========================================
4178 // Default constructor
4179 //========================================
4180 
4181 inline
UnicodeString()4182 UnicodeString::UnicodeString() {
4183   fUnion.fStackFields.fLengthAndFlags=kShortString;
4184 }
4185 
UnicodeString(const std::nullptr_t)4186 inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/) {
4187   fUnion.fStackFields.fLengthAndFlags=kShortString;
4188 }
4189 
UnicodeString(const std::nullptr_t,int32_t)4190 inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/, int32_t /*length*/) {
4191   fUnion.fStackFields.fLengthAndFlags=kShortString;
4192 }
4193 
UnicodeString(std::nullptr_t,int32_t,int32_t)4194 inline UnicodeString::UnicodeString(std::nullptr_t /*buffer*/, int32_t /*buffLength*/, int32_t /*buffCapacity*/) {
4195   fUnion.fStackFields.fLengthAndFlags=kShortString;
4196 }
4197 
4198 //========================================
4199 // Read-only implementation methods
4200 //========================================
4201 inline UBool
hasShortLength()4202 UnicodeString::hasShortLength() const {
4203   return fUnion.fFields.fLengthAndFlags>=0;
4204 }
4205 
4206 inline int32_t
getShortLength()4207 UnicodeString::getShortLength() const {
4208   // fLengthAndFlags must be non-negative -> short length >= 0
4209   // and arithmetic or logical shift does not matter.
4210   return fUnion.fFields.fLengthAndFlags>>kLengthShift;
4211 }
4212 
4213 inline int32_t
length()4214 UnicodeString::length() const {
4215   return hasShortLength() ? getShortLength() : fUnion.fFields.fLength;
4216 }
4217 
4218 inline int32_t
getCapacity()4219 UnicodeString::getCapacity() const {
4220   return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
4221     US_STACKBUF_SIZE : fUnion.fFields.fCapacity;
4222 }
4223 
4224 inline int32_t
hashCode()4225 UnicodeString::hashCode() const
4226 { return doHashCode(); }
4227 
4228 inline UBool
isBogus()4229 UnicodeString::isBogus() const
4230 { return fUnion.fFields.fLengthAndFlags & kIsBogus; }
4231 
4232 inline UBool
isWritable()4233 UnicodeString::isWritable() const
4234 { return !(fUnion.fFields.fLengthAndFlags & (kOpenGetBuffer | kIsBogus)); }
4235 
4236 inline UBool
isBufferWritable()4237 UnicodeString::isBufferWritable() const
4238 {
4239   return
4240       !(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
4241       (!(fUnion.fFields.fLengthAndFlags&kRefCounted) || refCount()==1);
4242 }
4243 
4244 inline const char16_t *
getBuffer()4245 UnicodeString::getBuffer() const {
4246   if(fUnion.fFields.fLengthAndFlags&(kIsBogus|kOpenGetBuffer)) {
4247     return nullptr;
4248   } else if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) {
4249     return fUnion.fStackFields.fBuffer;
4250   } else {
4251     return fUnion.fFields.fArray;
4252   }
4253 }
4254 
4255 //========================================
4256 // Read-only alias methods
4257 //========================================
4258 inline int8_t
doCompare(int32_t start,int32_t thisLength,const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)4259 UnicodeString::doCompare(int32_t start,
4260               int32_t thisLength,
4261               const UnicodeString& srcText,
4262               int32_t srcStart,
4263               int32_t srcLength) const
4264 {
4265   if(srcText.isBogus()) {
4266     return static_cast<int8_t>(!isBogus()); // 0 if both are bogus, 1 otherwise
4267   } else {
4268     srcText.pinIndices(srcStart, srcLength);
4269     return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
4270   }
4271 }
4272 
4273 inline UBool
doEqualsSubstring(int32_t start,int32_t thisLength,const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)4274 UnicodeString::doEqualsSubstring(int32_t start,
4275               int32_t thisLength,
4276               const UnicodeString& srcText,
4277               int32_t srcStart,
4278               int32_t srcLength) const
4279 {
4280   if(srcText.isBogus()) {
4281     return isBogus();
4282   } else {
4283     srcText.pinIndices(srcStart, srcLength);
4284     return !isBogus() && doEqualsSubstring(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
4285   }
4286 }
4287 
4288 inline bool
4289 UnicodeString::operator== (const UnicodeString& text) const
4290 {
4291   if(isBogus()) {
4292     return text.isBogus();
4293   } else {
4294     int32_t len = length(), textLength = text.length();
4295     return !text.isBogus() && len == textLength && doEquals(text, len);
4296   }
4297 }
4298 
4299 inline bool
4300 UnicodeString::operator!= (const UnicodeString& text) const
4301 { return (! operator==(text)); }
4302 
4303 inline UBool
4304 UnicodeString::operator> (const UnicodeString& text) const
4305 { return doCompare(0, length(), text, 0, text.length()) == 1; }
4306 
4307 inline UBool
4308 UnicodeString::operator< (const UnicodeString& text) const
4309 { return doCompare(0, length(), text, 0, text.length()) == -1; }
4310 
4311 inline UBool
4312 UnicodeString::operator>= (const UnicodeString& text) const
4313 { return doCompare(0, length(), text, 0, text.length()) != -1; }
4314 
4315 inline UBool
4316 UnicodeString::operator<= (const UnicodeString& text) const
4317 { return doCompare(0, length(), text, 0, text.length()) != 1; }
4318 
4319 inline int8_t
compare(const UnicodeString & text)4320 UnicodeString::compare(const UnicodeString& text) const
4321 { return doCompare(0, length(), text, 0, text.length()); }
4322 
4323 inline int8_t
compare(int32_t start,int32_t _length,const UnicodeString & srcText)4324 UnicodeString::compare(int32_t start,
4325                int32_t _length,
4326                const UnicodeString& srcText) const
4327 { return doCompare(start, _length, srcText, 0, srcText.length()); }
4328 
4329 inline int8_t
compare(ConstChar16Ptr srcChars,int32_t srcLength)4330 UnicodeString::compare(ConstChar16Ptr srcChars,
4331                int32_t srcLength) const
4332 { return doCompare(0, length(), srcChars, 0, srcLength); }
4333 
4334 inline int8_t
compare(int32_t start,int32_t _length,const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)4335 UnicodeString::compare(int32_t start,
4336                int32_t _length,
4337                const UnicodeString& srcText,
4338                int32_t srcStart,
4339                int32_t srcLength) const
4340 { return doCompare(start, _length, srcText, srcStart, srcLength); }
4341 
4342 inline int8_t
compare(int32_t start,int32_t _length,const char16_t * srcChars)4343 UnicodeString::compare(int32_t start,
4344                int32_t _length,
4345                const char16_t *srcChars) const
4346 { return doCompare(start, _length, srcChars, 0, _length); }
4347 
4348 inline int8_t
compare(int32_t start,int32_t _length,const char16_t * srcChars,int32_t srcStart,int32_t srcLength)4349 UnicodeString::compare(int32_t start,
4350                int32_t _length,
4351                const char16_t *srcChars,
4352                int32_t srcStart,
4353                int32_t srcLength) const
4354 { return doCompare(start, _length, srcChars, srcStart, srcLength); }
4355 
4356 inline int8_t
compareBetween(int32_t start,int32_t limit,const UnicodeString & srcText,int32_t srcStart,int32_t srcLimit)4357 UnicodeString::compareBetween(int32_t start,
4358                   int32_t limit,
4359                   const UnicodeString& srcText,
4360                   int32_t srcStart,
4361                   int32_t srcLimit) const
4362 { return doCompare(start, limit - start,
4363            srcText, srcStart, srcLimit - srcStart); }
4364 
4365 inline int8_t
doCompareCodePointOrder(int32_t start,int32_t thisLength,const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)4366 UnicodeString::doCompareCodePointOrder(int32_t start,
4367                                        int32_t thisLength,
4368                                        const UnicodeString& srcText,
4369                                        int32_t srcStart,
4370                                        int32_t srcLength) const
4371 {
4372   if(srcText.isBogus()) {
4373     return static_cast<int8_t>(!isBogus()); // 0 if both are bogus, 1 otherwise
4374   } else {
4375     srcText.pinIndices(srcStart, srcLength);
4376     return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
4377   }
4378 }
4379 
4380 inline int8_t
compareCodePointOrder(const UnicodeString & text)4381 UnicodeString::compareCodePointOrder(const UnicodeString& text) const
4382 { return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
4383 
4384 inline int8_t
compareCodePointOrder(int32_t start,int32_t _length,const UnicodeString & srcText)4385 UnicodeString::compareCodePointOrder(int32_t start,
4386                                      int32_t _length,
4387                                      const UnicodeString& srcText) const
4388 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
4389 
4390 inline int8_t
compareCodePointOrder(ConstChar16Ptr srcChars,int32_t srcLength)4391 UnicodeString::compareCodePointOrder(ConstChar16Ptr srcChars,
4392                                      int32_t srcLength) const
4393 { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
4394 
4395 inline int8_t
compareCodePointOrder(int32_t start,int32_t _length,const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)4396 UnicodeString::compareCodePointOrder(int32_t start,
4397                                      int32_t _length,
4398                                      const UnicodeString& srcText,
4399                                      int32_t srcStart,
4400                                      int32_t srcLength) const
4401 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
4402 
4403 inline int8_t
compareCodePointOrder(int32_t start,int32_t _length,const char16_t * srcChars)4404 UnicodeString::compareCodePointOrder(int32_t start,
4405                                      int32_t _length,
4406                                      const char16_t *srcChars) const
4407 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
4408 
4409 inline int8_t
compareCodePointOrder(int32_t start,int32_t _length,const char16_t * srcChars,int32_t srcStart,int32_t srcLength)4410 UnicodeString::compareCodePointOrder(int32_t start,
4411                                      int32_t _length,
4412                                      const char16_t *srcChars,
4413                                      int32_t srcStart,
4414                                      int32_t srcLength) const
4415 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
4416 
4417 inline int8_t
compareCodePointOrderBetween(int32_t start,int32_t limit,const UnicodeString & srcText,int32_t srcStart,int32_t srcLimit)4418 UnicodeString::compareCodePointOrderBetween(int32_t start,
4419                                             int32_t limit,
4420                                             const UnicodeString& srcText,
4421                                             int32_t srcStart,
4422                                             int32_t srcLimit) const
4423 { return doCompareCodePointOrder(start, limit - start,
4424            srcText, srcStart, srcLimit - srcStart); }
4425 
4426 inline int8_t
doCaseCompare(int32_t start,int32_t thisLength,const UnicodeString & srcText,int32_t srcStart,int32_t srcLength,uint32_t options)4427 UnicodeString::doCaseCompare(int32_t start,
4428                              int32_t thisLength,
4429                              const UnicodeString &srcText,
4430                              int32_t srcStart,
4431                              int32_t srcLength,
4432                              uint32_t options) const
4433 {
4434   if(srcText.isBogus()) {
4435     return static_cast<int8_t>(!isBogus()); // 0 if both are bogus, 1 otherwise
4436   } else {
4437     srcText.pinIndices(srcStart, srcLength);
4438     return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
4439   }
4440 }
4441 
4442 inline int8_t
caseCompare(const UnicodeString & text,uint32_t options)4443 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
4444   return doCaseCompare(0, length(), text, 0, text.length(), options);
4445 }
4446 
4447 inline int8_t
caseCompare(int32_t start,int32_t _length,const UnicodeString & srcText,uint32_t options)4448 UnicodeString::caseCompare(int32_t start,
4449                            int32_t _length,
4450                            const UnicodeString &srcText,
4451                            uint32_t options) const {
4452   return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
4453 }
4454 
4455 inline int8_t
caseCompare(ConstChar16Ptr srcChars,int32_t srcLength,uint32_t options)4456 UnicodeString::caseCompare(ConstChar16Ptr srcChars,
4457                            int32_t srcLength,
4458                            uint32_t options) const {
4459   return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
4460 }
4461 
4462 inline int8_t
caseCompare(int32_t start,int32_t _length,const UnicodeString & srcText,int32_t srcStart,int32_t srcLength,uint32_t options)4463 UnicodeString::caseCompare(int32_t start,
4464                            int32_t _length,
4465                            const UnicodeString &srcText,
4466                            int32_t srcStart,
4467                            int32_t srcLength,
4468                            uint32_t options) const {
4469   return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
4470 }
4471 
4472 inline int8_t
caseCompare(int32_t start,int32_t _length,const char16_t * srcChars,uint32_t options)4473 UnicodeString::caseCompare(int32_t start,
4474                            int32_t _length,
4475                            const char16_t *srcChars,
4476                            uint32_t options) const {
4477   return doCaseCompare(start, _length, srcChars, 0, _length, options);
4478 }
4479 
4480 inline int8_t
caseCompare(int32_t start,int32_t _length,const char16_t * srcChars,int32_t srcStart,int32_t srcLength,uint32_t options)4481 UnicodeString::caseCompare(int32_t start,
4482                            int32_t _length,
4483                            const char16_t *srcChars,
4484                            int32_t srcStart,
4485                            int32_t srcLength,
4486                            uint32_t options) const {
4487   return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
4488 }
4489 
4490 inline int8_t
caseCompareBetween(int32_t start,int32_t limit,const UnicodeString & srcText,int32_t srcStart,int32_t srcLimit,uint32_t options)4491 UnicodeString::caseCompareBetween(int32_t start,
4492                                   int32_t limit,
4493                                   const UnicodeString &srcText,
4494                                   int32_t srcStart,
4495                                   int32_t srcLimit,
4496                                   uint32_t options) const {
4497   return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
4498 }
4499 
4500 inline int32_t
indexOf(const UnicodeString & srcText,int32_t srcStart,int32_t srcLength,int32_t start,int32_t _length)4501 UnicodeString::indexOf(const UnicodeString& srcText,
4502                int32_t srcStart,
4503                int32_t srcLength,
4504                int32_t start,
4505                int32_t _length) const
4506 {
4507   if(!srcText.isBogus()) {
4508     srcText.pinIndices(srcStart, srcLength);
4509     if(srcLength > 0) {
4510       return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
4511     }
4512   }
4513   return -1;
4514 }
4515 
4516 inline int32_t
indexOf(const UnicodeString & text)4517 UnicodeString::indexOf(const UnicodeString& text) const
4518 { return indexOf(text, 0, text.length(), 0, length()); }
4519 
4520 inline int32_t
indexOf(const UnicodeString & text,int32_t start)4521 UnicodeString::indexOf(const UnicodeString& text,
4522                int32_t start) const {
4523   pinIndex(start);
4524   return indexOf(text, 0, text.length(), start, length() - start);
4525 }
4526 
4527 inline int32_t
indexOf(const UnicodeString & text,int32_t start,int32_t _length)4528 UnicodeString::indexOf(const UnicodeString& text,
4529                int32_t start,
4530                int32_t _length) const
4531 { return indexOf(text, 0, text.length(), start, _length); }
4532 
4533 inline int32_t
indexOf(const char16_t * srcChars,int32_t srcLength,int32_t start)4534 UnicodeString::indexOf(const char16_t *srcChars,
4535                int32_t srcLength,
4536                int32_t start) const {
4537   pinIndex(start);
4538   return indexOf(srcChars, 0, srcLength, start, length() - start);
4539 }
4540 
4541 inline int32_t
indexOf(ConstChar16Ptr srcChars,int32_t srcLength,int32_t start,int32_t _length)4542 UnicodeString::indexOf(ConstChar16Ptr srcChars,
4543                int32_t srcLength,
4544                int32_t start,
4545                int32_t _length) const
4546 { return indexOf(srcChars, 0, srcLength, start, _length); }
4547 
4548 inline int32_t
indexOf(char16_t c,int32_t start,int32_t _length)4549 UnicodeString::indexOf(char16_t c,
4550                int32_t start,
4551                int32_t _length) const
4552 { return doIndexOf(c, start, _length); }
4553 
4554 inline int32_t
indexOf(UChar32 c,int32_t start,int32_t _length)4555 UnicodeString::indexOf(UChar32 c,
4556                int32_t start,
4557                int32_t _length) const
4558 { return doIndexOf(c, start, _length); }
4559 
4560 inline int32_t
indexOf(char16_t c)4561 UnicodeString::indexOf(char16_t c) const
4562 { return doIndexOf(c, 0, length()); }
4563 
4564 inline int32_t
indexOf(UChar32 c)4565 UnicodeString::indexOf(UChar32 c) const
4566 { return indexOf(c, 0, length()); }
4567 
4568 inline int32_t
indexOf(char16_t c,int32_t start)4569 UnicodeString::indexOf(char16_t c,
4570                int32_t start) const {
4571   pinIndex(start);
4572   return doIndexOf(c, start, length() - start);
4573 }
4574 
4575 inline int32_t
indexOf(UChar32 c,int32_t start)4576 UnicodeString::indexOf(UChar32 c,
4577                int32_t start) const {
4578   pinIndex(start);
4579   return indexOf(c, start, length() - start);
4580 }
4581 
4582 inline int32_t
lastIndexOf(ConstChar16Ptr srcChars,int32_t srcLength,int32_t start,int32_t _length)4583 UnicodeString::lastIndexOf(ConstChar16Ptr srcChars,
4584                int32_t srcLength,
4585                int32_t start,
4586                int32_t _length) const
4587 { return lastIndexOf(srcChars, 0, srcLength, start, _length); }
4588 
4589 inline int32_t
lastIndexOf(const char16_t * srcChars,int32_t srcLength,int32_t start)4590 UnicodeString::lastIndexOf(const char16_t *srcChars,
4591                int32_t srcLength,
4592                int32_t start) const {
4593   pinIndex(start);
4594   return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
4595 }
4596 
4597 inline int32_t
lastIndexOf(const UnicodeString & srcText,int32_t srcStart,int32_t srcLength,int32_t start,int32_t _length)4598 UnicodeString::lastIndexOf(const UnicodeString& srcText,
4599                int32_t srcStart,
4600                int32_t srcLength,
4601                int32_t start,
4602                int32_t _length) const
4603 {
4604   if(!srcText.isBogus()) {
4605     srcText.pinIndices(srcStart, srcLength);
4606     if(srcLength > 0) {
4607       return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
4608     }
4609   }
4610   return -1;
4611 }
4612 
4613 inline int32_t
lastIndexOf(const UnicodeString & text,int32_t start,int32_t _length)4614 UnicodeString::lastIndexOf(const UnicodeString& text,
4615                int32_t start,
4616                int32_t _length) const
4617 { return lastIndexOf(text, 0, text.length(), start, _length); }
4618 
4619 inline int32_t
lastIndexOf(const UnicodeString & text,int32_t start)4620 UnicodeString::lastIndexOf(const UnicodeString& text,
4621                int32_t start) const {
4622   pinIndex(start);
4623   return lastIndexOf(text, 0, text.length(), start, length() - start);
4624 }
4625 
4626 inline int32_t
lastIndexOf(const UnicodeString & text)4627 UnicodeString::lastIndexOf(const UnicodeString& text) const
4628 { return lastIndexOf(text, 0, text.length(), 0, length()); }
4629 
4630 inline int32_t
lastIndexOf(char16_t c,int32_t start,int32_t _length)4631 UnicodeString::lastIndexOf(char16_t c,
4632                int32_t start,
4633                int32_t _length) const
4634 { return doLastIndexOf(c, start, _length); }
4635 
4636 inline int32_t
lastIndexOf(UChar32 c,int32_t start,int32_t _length)4637 UnicodeString::lastIndexOf(UChar32 c,
4638                int32_t start,
4639                int32_t _length) const {
4640   return doLastIndexOf(c, start, _length);
4641 }
4642 
4643 inline int32_t
lastIndexOf(char16_t c)4644 UnicodeString::lastIndexOf(char16_t c) const
4645 { return doLastIndexOf(c, 0, length()); }
4646 
4647 inline int32_t
lastIndexOf(UChar32 c)4648 UnicodeString::lastIndexOf(UChar32 c) const {
4649   return lastIndexOf(c, 0, length());
4650 }
4651 
4652 inline int32_t
lastIndexOf(char16_t c,int32_t start)4653 UnicodeString::lastIndexOf(char16_t c,
4654                int32_t start) const {
4655   pinIndex(start);
4656   return doLastIndexOf(c, start, length() - start);
4657 }
4658 
4659 inline int32_t
lastIndexOf(UChar32 c,int32_t start)4660 UnicodeString::lastIndexOf(UChar32 c,
4661                int32_t start) const {
4662   pinIndex(start);
4663   return lastIndexOf(c, start, length() - start);
4664 }
4665 
4666 inline UBool
startsWith(const UnicodeString & text)4667 UnicodeString::startsWith(const UnicodeString& text) const
4668 { return doEqualsSubstring(0, text.length(), text, 0, text.length()); }
4669 
4670 inline UBool
startsWith(const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)4671 UnicodeString::startsWith(const UnicodeString& srcText,
4672               int32_t srcStart,
4673               int32_t srcLength) const
4674 { return doEqualsSubstring(0, srcLength, srcText, srcStart, srcLength); }
4675 
4676 inline UBool
startsWith(ConstChar16Ptr srcChars,int32_t srcLength)4677 UnicodeString::startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const {
4678   if(srcLength < 0) {
4679     srcLength = u_strlen(toUCharPtr(srcChars));
4680   }
4681   return doEqualsSubstring(0, srcLength, srcChars, 0, srcLength);
4682 }
4683 
4684 inline UBool
startsWith(const char16_t * srcChars,int32_t srcStart,int32_t srcLength)4685 UnicodeString::startsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const {
4686   if(srcLength < 0) {
4687     srcLength = u_strlen(toUCharPtr(srcChars));
4688   }
4689   return doEqualsSubstring(0, srcLength, srcChars, srcStart, srcLength);
4690 }
4691 
4692 inline UBool
endsWith(const UnicodeString & text)4693 UnicodeString::endsWith(const UnicodeString& text) const
4694 { return doEqualsSubstring(length() - text.length(), text.length(),
4695            text, 0, text.length()); }
4696 
4697 inline UBool
endsWith(const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)4698 UnicodeString::endsWith(const UnicodeString& srcText,
4699             int32_t srcStart,
4700             int32_t srcLength) const {
4701   srcText.pinIndices(srcStart, srcLength);
4702   return doEqualsSubstring(length() - srcLength, srcLength,
4703                    srcText, srcStart, srcLength);
4704 }
4705 
4706 inline UBool
endsWith(ConstChar16Ptr srcChars,int32_t srcLength)4707 UnicodeString::endsWith(ConstChar16Ptr srcChars,
4708             int32_t srcLength) const {
4709   if(srcLength < 0) {
4710     srcLength = u_strlen(toUCharPtr(srcChars));
4711   }
4712   return doEqualsSubstring(length() - srcLength, srcLength, srcChars, 0, srcLength);
4713 }
4714 
4715 inline UBool
endsWith(const char16_t * srcChars,int32_t srcStart,int32_t srcLength)4716 UnicodeString::endsWith(const char16_t *srcChars,
4717             int32_t srcStart,
4718             int32_t srcLength) const {
4719   if(srcLength < 0) {
4720     srcLength = u_strlen(toUCharPtr(srcChars + srcStart));
4721   }
4722   return doEqualsSubstring(length() - srcLength, srcLength,
4723                    srcChars, srcStart, srcLength);
4724 }
4725 
4726 //========================================
4727 // replace
4728 //========================================
4729 inline UnicodeString&
replace(int32_t start,int32_t _length,const UnicodeString & srcText)4730 UnicodeString::replace(int32_t start,
4731                int32_t _length,
4732                const UnicodeString& srcText)
4733 { return doReplace(start, _length, srcText, 0, srcText.length()); }
4734 
4735 inline UnicodeString&
replace(int32_t start,int32_t _length,const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)4736 UnicodeString::replace(int32_t start,
4737                int32_t _length,
4738                const UnicodeString& srcText,
4739                int32_t srcStart,
4740                int32_t srcLength)
4741 { return doReplace(start, _length, srcText, srcStart, srcLength); }
4742 
4743 inline UnicodeString&
replace(int32_t start,int32_t _length,ConstChar16Ptr srcChars,int32_t srcLength)4744 UnicodeString::replace(int32_t start,
4745                int32_t _length,
4746                ConstChar16Ptr srcChars,
4747                int32_t srcLength)
4748 { return doReplace(start, _length, srcChars, 0, srcLength); }
4749 
4750 inline UnicodeString&
replace(int32_t start,int32_t _length,const char16_t * srcChars,int32_t srcStart,int32_t srcLength)4751 UnicodeString::replace(int32_t start,
4752                int32_t _length,
4753                const char16_t *srcChars,
4754                int32_t srcStart,
4755                int32_t srcLength)
4756 { return doReplace(start, _length, srcChars, srcStart, srcLength); }
4757 
4758 inline UnicodeString&
replace(int32_t start,int32_t _length,char16_t srcChar)4759 UnicodeString::replace(int32_t start,
4760                int32_t _length,
4761                char16_t srcChar)
4762 { return doReplace(start, _length, &srcChar, 0, 1); }
4763 
4764 inline UnicodeString&
replaceBetween(int32_t start,int32_t limit,const UnicodeString & srcText)4765 UnicodeString::replaceBetween(int32_t start,
4766                   int32_t limit,
4767                   const UnicodeString& srcText)
4768 { return doReplace(start, limit - start, srcText, 0, srcText.length()); }
4769 
4770 inline UnicodeString&
replaceBetween(int32_t start,int32_t limit,const UnicodeString & srcText,int32_t srcStart,int32_t srcLimit)4771 UnicodeString::replaceBetween(int32_t start,
4772                   int32_t limit,
4773                   const UnicodeString& srcText,
4774                   int32_t srcStart,
4775                   int32_t srcLimit)
4776 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
4777 
4778 inline UnicodeString&
findAndReplace(const UnicodeString & oldText,const UnicodeString & newText)4779 UnicodeString::findAndReplace(const UnicodeString& oldText,
4780                   const UnicodeString& newText)
4781 { return findAndReplace(0, length(), oldText, 0, oldText.length(),
4782             newText, 0, newText.length()); }
4783 
4784 inline UnicodeString&
findAndReplace(int32_t start,int32_t _length,const UnicodeString & oldText,const UnicodeString & newText)4785 UnicodeString::findAndReplace(int32_t start,
4786                   int32_t _length,
4787                   const UnicodeString& oldText,
4788                   const UnicodeString& newText)
4789 { return findAndReplace(start, _length, oldText, 0, oldText.length(),
4790             newText, 0, newText.length()); }
4791 
4792 // ============================
4793 // extract
4794 // ============================
4795 inline void
doExtract(int32_t start,int32_t _length,UnicodeString & target)4796 UnicodeString::doExtract(int32_t start,
4797              int32_t _length,
4798              UnicodeString& target) const
4799 { target.replace(0, target.length(), *this, start, _length); }
4800 
4801 inline void
extract(int32_t start,int32_t _length,Char16Ptr target,int32_t targetStart)4802 UnicodeString::extract(int32_t start,
4803                int32_t _length,
4804                Char16Ptr target,
4805                int32_t targetStart) const
4806 { doExtract(start, _length, target, targetStart); }
4807 
4808 inline void
extract(int32_t start,int32_t _length,UnicodeString & target)4809 UnicodeString::extract(int32_t start,
4810                int32_t _length,
4811                UnicodeString& target) const
4812 { doExtract(start, _length, target); }
4813 
4814 #if !UCONFIG_NO_CONVERSION
4815 
4816 inline int32_t
extract(int32_t start,int32_t _length,char * dst,const char * codepage)4817 UnicodeString::extract(int32_t start,
4818                int32_t _length,
4819                char *dst,
4820                const char *codepage) const
4821 
4822 {
4823   // This dstSize value will be checked explicitly
4824   return extract(start, _length, dst, dst != nullptr ? 0xffffffff : 0, codepage);
4825 }
4826 
4827 #endif
4828 
4829 inline void
extractBetween(int32_t start,int32_t limit,char16_t * dst,int32_t dstStart)4830 UnicodeString::extractBetween(int32_t start,
4831                   int32_t limit,
4832                   char16_t *dst,
4833                   int32_t dstStart) const {
4834   pinIndex(start);
4835   pinIndex(limit);
4836   doExtract(start, limit - start, dst, dstStart);
4837 }
4838 
4839 inline UnicodeString
tempSubStringBetween(int32_t start,int32_t limit)4840 UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
4841     return tempSubString(start, limit - start);
4842 }
4843 
4844 inline char16_t
doCharAt(int32_t offset)4845 UnicodeString::doCharAt(int32_t offset) const
4846 {
4847   if (static_cast<uint32_t>(offset) < static_cast<uint32_t>(length())) {
4848     return getArrayStart()[offset];
4849   } else {
4850     return kInvalidUChar;
4851   }
4852 }
4853 
4854 inline char16_t
charAt(int32_t offset)4855 UnicodeString::charAt(int32_t offset) const
4856 { return doCharAt(offset); }
4857 
4858 inline char16_t
4859 UnicodeString::operator[] (int32_t offset) const
4860 { return doCharAt(offset); }
4861 
4862 inline UBool
isEmpty()4863 UnicodeString::isEmpty() const {
4864   // Arithmetic or logical right shift does not matter: only testing for 0.
4865   return (fUnion.fFields.fLengthAndFlags>>kLengthShift) == 0;
4866 }
4867 
4868 //========================================
4869 // Write implementation methods
4870 //========================================
4871 inline void
setZeroLength()4872 UnicodeString::setZeroLength() {
4873   fUnion.fFields.fLengthAndFlags &= kAllStorageFlags;
4874 }
4875 
4876 inline void
setShortLength(int32_t len)4877 UnicodeString::setShortLength(int32_t len) {
4878   // requires 0 <= len <= kMaxShortLength
4879   fUnion.fFields.fLengthAndFlags =
4880     static_cast<int16_t>((fUnion.fFields.fLengthAndFlags & kAllStorageFlags) | (len << kLengthShift));
4881 }
4882 
4883 inline void
setLength(int32_t len)4884 UnicodeString::setLength(int32_t len) {
4885   if(len <= kMaxShortLength) {
4886     setShortLength(len);
4887   } else {
4888     fUnion.fFields.fLengthAndFlags |= kLengthIsLarge;
4889     fUnion.fFields.fLength = len;
4890   }
4891 }
4892 
4893 inline void
setToEmpty()4894 UnicodeString::setToEmpty() {
4895   fUnion.fFields.fLengthAndFlags = kShortString;
4896 }
4897 
4898 inline void
setArray(char16_t * array,int32_t len,int32_t capacity)4899 UnicodeString::setArray(char16_t *array, int32_t len, int32_t capacity) {
4900   setLength(len);
4901   fUnion.fFields.fArray = array;
4902   fUnion.fFields.fCapacity = capacity;
4903 }
4904 
4905 inline UnicodeString&
4906 UnicodeString::operator= (char16_t ch)
4907 { return doReplace(0, length(), &ch, 0, 1); }
4908 
4909 inline UnicodeString&
4910 UnicodeString::operator= (UChar32 ch)
4911 { return replace(0, length(), ch); }
4912 
4913 inline UnicodeString&
setTo(const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)4914 UnicodeString::setTo(const UnicodeString& srcText,
4915              int32_t srcStart,
4916              int32_t srcLength)
4917 {
4918   unBogus();
4919   return doReplace(0, length(), srcText, srcStart, srcLength);
4920 }
4921 
4922 inline UnicodeString&
setTo(const UnicodeString & srcText,int32_t srcStart)4923 UnicodeString::setTo(const UnicodeString& srcText,
4924              int32_t srcStart)
4925 {
4926   unBogus();
4927   srcText.pinIndex(srcStart);
4928   return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
4929 }
4930 
4931 inline UnicodeString&
setTo(const UnicodeString & srcText)4932 UnicodeString::setTo(const UnicodeString& srcText)
4933 {
4934   return copyFrom(srcText);
4935 }
4936 
4937 inline UnicodeString&
setTo(const char16_t * srcChars,int32_t srcLength)4938 UnicodeString::setTo(const char16_t *srcChars,
4939              int32_t srcLength)
4940 {
4941   unBogus();
4942   return doReplace(0, length(), srcChars, 0, srcLength);
4943 }
4944 
4945 inline UnicodeString&
setTo(char16_t srcChar)4946 UnicodeString::setTo(char16_t srcChar)
4947 {
4948   unBogus();
4949   return doReplace(0, length(), &srcChar, 0, 1);
4950 }
4951 
4952 inline UnicodeString&
setTo(UChar32 srcChar)4953 UnicodeString::setTo(UChar32 srcChar)
4954 {
4955   unBogus();
4956   return replace(0, length(), srcChar);
4957 }
4958 
4959 inline UnicodeString&
append(const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)4960 UnicodeString::append(const UnicodeString& srcText,
4961               int32_t srcStart,
4962               int32_t srcLength)
4963 { return doAppend(srcText, srcStart, srcLength); }
4964 
4965 inline UnicodeString&
append(const UnicodeString & srcText)4966 UnicodeString::append(const UnicodeString& srcText)
4967 { return doAppend(srcText, 0, srcText.length()); }
4968 
4969 inline UnicodeString&
append(const char16_t * srcChars,int32_t srcStart,int32_t srcLength)4970 UnicodeString::append(const char16_t *srcChars,
4971               int32_t srcStart,
4972               int32_t srcLength)
4973 { return doAppend(srcChars, srcStart, srcLength); }
4974 
4975 inline UnicodeString&
append(ConstChar16Ptr srcChars,int32_t srcLength)4976 UnicodeString::append(ConstChar16Ptr srcChars,
4977               int32_t srcLength)
4978 { return doAppend(srcChars, 0, srcLength); }
4979 
4980 inline UnicodeString&
append(char16_t srcChar)4981 UnicodeString::append(char16_t srcChar)
4982 { return doAppend(&srcChar, 0, 1); }
4983 
4984 inline UnicodeString&
4985 UnicodeString::operator+= (char16_t ch)
4986 { return doAppend(&ch, 0, 1); }
4987 
4988 inline UnicodeString&
4989 UnicodeString::operator+= (UChar32 ch) {
4990   return append(ch);
4991 }
4992 
4993 inline UnicodeString&
4994 UnicodeString::operator+= (const UnicodeString& srcText)
4995 { return doAppend(srcText, 0, srcText.length()); }
4996 
4997 inline UnicodeString&
insert(int32_t start,const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)4998 UnicodeString::insert(int32_t start,
4999               const UnicodeString& srcText,
5000               int32_t srcStart,
5001               int32_t srcLength)
5002 { return doReplace(start, 0, srcText, srcStart, srcLength); }
5003 
5004 inline UnicodeString&
insert(int32_t start,const UnicodeString & srcText)5005 UnicodeString::insert(int32_t start,
5006               const UnicodeString& srcText)
5007 { return doReplace(start, 0, srcText, 0, srcText.length()); }
5008 
5009 inline UnicodeString&
insert(int32_t start,const char16_t * srcChars,int32_t srcStart,int32_t srcLength)5010 UnicodeString::insert(int32_t start,
5011               const char16_t *srcChars,
5012               int32_t srcStart,
5013               int32_t srcLength)
5014 { return doReplace(start, 0, srcChars, srcStart, srcLength); }
5015 
5016 inline UnicodeString&
insert(int32_t start,ConstChar16Ptr srcChars,int32_t srcLength)5017 UnicodeString::insert(int32_t start,
5018               ConstChar16Ptr srcChars,
5019               int32_t srcLength)
5020 { return doReplace(start, 0, srcChars, 0, srcLength); }
5021 
5022 inline UnicodeString&
insert(int32_t start,char16_t srcChar)5023 UnicodeString::insert(int32_t start,
5024               char16_t srcChar)
5025 { return doReplace(start, 0, &srcChar, 0, 1); }
5026 
5027 inline UnicodeString&
insert(int32_t start,UChar32 srcChar)5028 UnicodeString::insert(int32_t start,
5029               UChar32 srcChar)
5030 { return replace(start, 0, srcChar); }
5031 
5032 
5033 inline UnicodeString&
remove()5034 UnicodeString::remove()
5035 {
5036   // remove() of a bogus string makes the string empty and non-bogus
5037   if(isBogus()) {
5038     setToEmpty();
5039   } else {
5040     setZeroLength();
5041   }
5042   return *this;
5043 }
5044 
5045 inline UnicodeString&
remove(int32_t start,int32_t _length)5046 UnicodeString::remove(int32_t start,
5047              int32_t _length)
5048 {
5049     if(start <= 0 && _length == INT32_MAX) {
5050         // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
5051         return remove();
5052     }
5053     return doReplace(start, _length, nullptr, 0, 0);
5054 }
5055 
5056 inline UnicodeString&
removeBetween(int32_t start,int32_t limit)5057 UnicodeString::removeBetween(int32_t start,
5058                 int32_t limit)
5059 { return doReplace(start, limit - start, nullptr, 0, 0); }
5060 
5061 inline UnicodeString &
retainBetween(int32_t start,int32_t limit)5062 UnicodeString::retainBetween(int32_t start, int32_t limit) {
5063   truncate(limit);
5064   return doReplace(0, start, nullptr, 0, 0);
5065 }
5066 
5067 inline UBool
truncate(int32_t targetLength)5068 UnicodeString::truncate(int32_t targetLength)
5069 {
5070   if(isBogus() && targetLength == 0) {
5071     // truncate(0) of a bogus string makes the string empty and non-bogus
5072     unBogus();
5073     return false;
5074   } else if (static_cast<uint32_t>(targetLength) < static_cast<uint32_t>(length())) {
5075     setLength(targetLength);
5076     return true;
5077   } else {
5078     return false;
5079   }
5080 }
5081 
5082 inline UnicodeString&
reverse()5083 UnicodeString::reverse()
5084 { return doReverse(0, length()); }
5085 
5086 inline UnicodeString&
reverse(int32_t start,int32_t _length)5087 UnicodeString::reverse(int32_t start,
5088                int32_t _length)
5089 { return doReverse(start, _length); }
5090 
5091 U_NAMESPACE_END
5092 
5093 #endif /* U_SHOW_CPLUSPLUS_API */
5094 
5095 #endif
5096