• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 *   Copyright (C) 1998-2016, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 **********************************************************************
8 *
9 * File unistr.h
10 *
11 * Modification History:
12 *
13 *   Date        Name        Description
14 *   09/25/98    stephen     Creation.
15 *   11/11/98    stephen     Changed per 11/9 code review.
16 *   04/20/99    stephen     Overhauled per 4/16 code review.
17 *   11/18/99    aliu        Made to inherit from Replaceable.  Added method
18 *                           handleReplaceBetween(); other methods unchanged.
19 *   06/25/01    grhoten     Remove dependency on iostream.
20 ******************************************************************************
21 */
22 
23 #ifndef UNISTR_H
24 #define UNISTR_H
25 
26 /**
27  * \file
28  * \brief C++ API: Unicode String
29  */
30 
31 #include <cstddef>
32 #include "unicode/utypes.h"
33 #include "unicode/char16ptr.h"
34 #include "unicode/rep.h"
35 #include "unicode/std_string.h"
36 #include "unicode/stringpiece.h"
37 #include "unicode/bytestream.h"
38 
39 struct UConverter;          // unicode/ucnv.h
40 
41 #ifndef USTRING_H
42 /**
43  * \ingroup ustring_ustrlen
44  */
45 U_STABLE int32_t U_EXPORT2
46 u_strlen(const UChar *s);
47 #endif
48 
49 U_NAMESPACE_BEGIN
50 
51 #if !UCONFIG_NO_BREAK_ITERATION
52 class BreakIterator;        // unicode/brkiter.h
53 #endif
54 class Edits;
55 
56 U_NAMESPACE_END
57 
58 // Not #ifndef U_HIDE_INTERNAL_API because UnicodeString needs the UStringCaseMapper.
59 /**
60  * Internal string case mapping function type.
61  * All error checking must be done.
62  * src and dest must not overlap.
63  * @internal
64  */
65 typedef int32_t U_CALLCONV
66 UStringCaseMapper(int32_t caseLocale, uint32_t options,
67 #if !UCONFIG_NO_BREAK_ITERATION
68                   icu::BreakIterator *iter,
69 #endif
70                   char16_t *dest, int32_t destCapacity,
71                   const char16_t *src, int32_t srcLength,
72                   icu::Edits *edits,
73                   UErrorCode &errorCode);
74 
75 U_NAMESPACE_BEGIN
76 
77 class Locale;               // unicode/locid.h
78 class StringCharacterIterator;
79 class UnicodeStringAppendable;  // unicode/appendable.h
80 
81 /* The <iostream> include has been moved to unicode/ustream.h */
82 
83 /**
84  * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
85  * which constructs a Unicode string from an invariant-character char * string.
86  * About invariant characters see utypes.h.
87  * This constructor has no runtime dependency on conversion code and is
88  * therefore recommended over ones taking a charset name string
89  * (where the empty string "" indicates invariant-character conversion).
90  *
91  * @stable ICU 3.2
92  */
93 #define US_INV icu::UnicodeString::kInvariant
94 
95 /**
96  * Unicode String literals in C++.
97  *
98  * Note: these macros are not recommended for new code.
99  * Prior to the availability of C++11 and u"unicode string literals",
100  * these macros were provided for portability and efficiency when
101  * initializing UnicodeStrings from literals.
102  *
103  * They work only for strings that contain "invariant characters", i.e.,
104  * only latin letters, digits, and some punctuation.
105  * See utypes.h for details.
106  *
107  * The string parameter must be a C string literal.
108  * The length of the string, not including the terminating
109  * <code>NUL</code>, must be specified as a constant.
110  * @stable ICU 2.0
111  */
112 #if !U_CHAR16_IS_TYPEDEF
113 # define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, u ## cs, _length)
114 #else
115 # define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const char16_t*)u ## cs, _length)
116 #endif
117 
118 /**
119  * Unicode String literals in C++.
120  * Dependent on the platform properties, different UnicodeString
121  * constructors should be used to create a UnicodeString object from
122  * a string literal.
123  * The macros are defined for improved performance.
124  * They work only for strings that contain "invariant characters", i.e.,
125  * only latin letters, digits, and some punctuation.
126  * See utypes.h for details.
127  *
128  * The string parameter must be a C string literal.
129  * @stable ICU 2.0
130  */
131 #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
132 
133 /**
134  * \def UNISTR_FROM_CHAR_EXPLICIT
135  * This can be defined to be empty or "explicit".
136  * If explicit, then the UnicodeString(char16_t) and UnicodeString(UChar32)
137  * constructors are marked as explicit, preventing their inadvertent use.
138  * @stable ICU 49
139  */
140 #ifndef UNISTR_FROM_CHAR_EXPLICIT
141 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
142     // Auto-"explicit" in ICU library code.
143 #   define UNISTR_FROM_CHAR_EXPLICIT explicit
144 # else
145     // Empty by default for source code compatibility.
146 #   define UNISTR_FROM_CHAR_EXPLICIT
147 # endif
148 #endif
149 
150 /**
151  * \def UNISTR_FROM_STRING_EXPLICIT
152  * This can be defined to be empty or "explicit".
153  * If explicit, then the UnicodeString(const char *) and UnicodeString(const char16_t *)
154  * constructors are marked as explicit, preventing their inadvertent use.
155  *
156  * In particular, this helps prevent accidentally depending on ICU conversion code
157  * by passing a string literal into an API with a const UnicodeString & parameter.
158  * @stable ICU 49
159  */
160 #ifndef UNISTR_FROM_STRING_EXPLICIT
161 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
162     // Auto-"explicit" in ICU library code.
163 #   define UNISTR_FROM_STRING_EXPLICIT explicit
164 # else
165     // Empty by default for source code compatibility.
166 #   define UNISTR_FROM_STRING_EXPLICIT
167 # endif
168 #endif
169 
170 /**
171  * \def UNISTR_OBJECT_SIZE
172  * Desired sizeof(UnicodeString) in bytes.
173  * It should be a multiple of sizeof(pointer) to avoid unusable space for padding.
174  * The object size may want to be a multiple of 16 bytes,
175  * which is a common granularity for heap allocation.
176  *
177  * Any space inside the object beyond sizeof(vtable pointer) + 2
178  * is available for storing short strings inside the object.
179  * The bigger the object, the longer a string that can be stored inside the object,
180  * without additional heap allocation.
181  *
182  * Depending on a platform's pointer size, pointer alignment requirements,
183  * and struct padding, the compiler will usually round up sizeof(UnicodeString)
184  * to 4 * sizeof(pointer) (or 3 * sizeof(pointer) for P128 data models),
185  * to hold the fields for heap-allocated strings.
186  * Such a minimum size also ensures that the object is easily large enough
187  * to hold at least 2 char16_ts, for one supplementary code point (U16_MAX_LENGTH).
188  *
189  * sizeof(UnicodeString) >= 48 should work for all known platforms.
190  *
191  * For example, on a 64-bit machine where sizeof(vtable pointer) is 8,
192  * sizeof(UnicodeString) = 64 would leave space for
193  * (64 - sizeof(vtable pointer) - 2) / U_SIZEOF_UCHAR = (64 - 8 - 2) / 2 = 27
194  * char16_ts stored inside the object.
195  *
196  * The minimum object size on a 64-bit machine would be
197  * 4 * sizeof(pointer) = 4 * 8 = 32 bytes,
198  * and the internal buffer would hold up to 11 char16_ts in that case.
199  *
200  * @see U16_MAX_LENGTH
201  * @stable ICU 56
202  */
203 #ifndef UNISTR_OBJECT_SIZE
204 # define UNISTR_OBJECT_SIZE 64
205 #endif
206 
207 /**
208  * UnicodeString is a string class that stores Unicode characters directly and provides
209  * similar functionality as the Java String and StringBuffer/StringBuilder classes.
210  * It is a concrete implementation of the abstract class Replaceable (for transliteration).
211  *
212  * A UnicodeString may also "alias" an external array of characters
213  * (that is, point to it, rather than own the array)
214  * whose lifetime must then at least match the lifetime of the aliasing object.
215  * This aliasing may be preserved when returning a UnicodeString by value,
216  * depending on the compiler and the function implementation,
217  * via Return Value Optimization (RVO) or the move assignment operator.
218  * (However, the copy assignment operator does not preserve aliasing.)
219  * For details see the description of storage models at the end of the class API docs
220  * and in the User Guide chapter linked from there.
221  *
222  * The UnicodeString class is not suitable for subclassing.
223  *
224  * <p>For an overview of Unicode strings in C and C++ see the
225  * <a href="http://userguide.icu-project.org/strings#TOC-Strings-in-C-C-">User Guide Strings chapter</a>.</p>
226  *
227  * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.
228  * A Unicode character may be stored with either one code unit
229  * (the most common case) or with a matched pair of special code units
230  * ("surrogates"). The data type for code units is char16_t.
231  * For single-character handling, a Unicode character code <em>point</em> is a value
232  * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p>
233  *
234  * <p>Indexes and offsets into and lengths of strings always count code units, not code points.
235  * This is the same as with multi-byte char* strings in traditional string handling.
236  * Operations on partial strings typically do not test for code point boundaries.
237  * If necessary, the user needs to take care of such boundaries by testing for the code unit
238  * values or by using functions like
239  * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
240  * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p>
241  *
242  * UnicodeString methods are more lenient with regard to input parameter values
243  * than other ICU APIs. In particular:
244  * - If indexes are out of bounds for a UnicodeString object
245  *   (<0 or >length()) then they are "pinned" to the nearest boundary.
246  * - If the buffer passed to an insert/append/replace operation is owned by the
247  *   target object, e.g., calling str.append(str), an extra copy may take place
248  *   to ensure safety.
249  * - If primitive string pointer values (e.g., const char16_t * or char *)
250  *   for input strings are NULL, then those input string parameters are treated
251  *   as if they pointed to an empty string.
252  *   However, this is <em>not</em> the case for char * parameters for charset names
253  *   or other IDs.
254  * - Most UnicodeString methods do not take a UErrorCode parameter because
255  *   there are usually very few opportunities for failure other than a shortage
256  *   of memory, error codes in low-level C++ string methods would be inconvenient,
257  *   and the error code as the last parameter (ICU convention) would prevent
258  *   the use of default parameter values.
259  *   Instead, such methods set the UnicodeString into a "bogus" state
260  *   (see isBogus()) if an error occurs.
261  *
262  * In string comparisons, two UnicodeString objects that are both "bogus"
263  * compare equal (to be transitive and prevent endless loops in sorting),
264  * and a "bogus" string compares less than any non-"bogus" one.
265  *
266  * Const UnicodeString methods are thread-safe. Multiple threads can use
267  * const methods on the same UnicodeString object simultaneously,
268  * but non-const methods must not be called concurrently (in multiple threads)
269  * with any other (const or non-const) methods.
270  *
271  * Similarly, const UnicodeString & parameters are thread-safe.
272  * One object may be passed in as such a parameter concurrently in multiple threads.
273  * This includes the const UnicodeString & parameters for
274  * copy construction, assignment, and cloning.
275  *
276  * <p>UnicodeString uses several storage methods.
277  * String contents can be stored inside the UnicodeString object itself,
278  * in an allocated and shared buffer, or in an outside buffer that is "aliased".
279  * Most of this is done transparently, but careful aliasing in particular provides
280  * significant performance improvements.
281  * Also, the internal buffer is accessible via special functions.
282  * For details see the
283  * <a href="http://userguide.icu-project.org/strings#TOC-Maximizing-Performance-with-the-UnicodeString-Storage-Model">User Guide Strings chapter</a>.</p>
284  *
285  * @see utf.h
286  * @see CharacterIterator
287  * @stable ICU 2.0
288  */
289 class U_COMMON_API UnicodeString : public Replaceable
290 {
291 public:
292 
293   /**
294    * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
295    * which constructs a Unicode string from an invariant-character char * string.
296    * Use the macro US_INV instead of the full qualification for this value.
297    *
298    * @see US_INV
299    * @stable ICU 3.2
300    */
301   enum EInvariant {
302     /**
303      * @see EInvariant
304      * @stable ICU 3.2
305      */
306     kInvariant
307   };
308 
309   //========================================
310   // Read-only operations
311   //========================================
312 
313   /* Comparison - bitwise only - for international comparison use collation */
314 
315   /**
316    * Equality operator. Performs only bitwise comparison.
317    * @param text The UnicodeString to compare to this one.
318    * @return TRUE if <TT>text</TT> contains the same characters as this one,
319    * FALSE otherwise.
320    * @stable ICU 2.0
321    */
322   inline UBool operator== (const UnicodeString& text) const;
323 
324   /**
325    * Inequality operator. Performs only bitwise comparison.
326    * @param text The UnicodeString to compare to this one.
327    * @return FALSE if <TT>text</TT> contains the same characters as this one,
328    * TRUE otherwise.
329    * @stable ICU 2.0
330    */
331   inline UBool operator!= (const UnicodeString& text) const;
332 
333   /**
334    * Greater than operator. Performs only bitwise comparison.
335    * @param text The UnicodeString to compare to this one.
336    * @return TRUE if the characters in this are bitwise
337    * greater than the characters in <code>text</code>, FALSE otherwise
338    * @stable ICU 2.0
339    */
340   inline UBool operator> (const UnicodeString& text) const;
341 
342   /**
343    * Less than operator. Performs only bitwise comparison.
344    * @param text The UnicodeString to compare to this one.
345    * @return TRUE if the characters in this are bitwise
346    * less than the characters in <code>text</code>, FALSE otherwise
347    * @stable ICU 2.0
348    */
349   inline UBool operator< (const UnicodeString& text) const;
350 
351   /**
352    * Greater than or equal operator. Performs only bitwise comparison.
353    * @param text The UnicodeString to compare to this one.
354    * @return TRUE if the characters in this are bitwise
355    * greater than or equal to the characters in <code>text</code>, FALSE otherwise
356    * @stable ICU 2.0
357    */
358   inline UBool operator>= (const UnicodeString& text) const;
359 
360   /**
361    * Less than or equal operator. Performs only bitwise comparison.
362    * @param text The UnicodeString to compare to this one.
363    * @return TRUE if the characters in this are bitwise
364    * less than or equal to the characters in <code>text</code>, FALSE otherwise
365    * @stable ICU 2.0
366    */
367   inline UBool operator<= (const UnicodeString& text) const;
368 
369   /**
370    * Compare the characters bitwise in this UnicodeString to
371    * the characters in <code>text</code>.
372    * @param text The UnicodeString to compare to this one.
373    * @return The result of bitwise character comparison: 0 if this
374    * contains the same characters as <code>text</code>, -1 if the characters in
375    * this are bitwise less than the characters in <code>text</code>, +1 if the
376    * characters in this are bitwise greater than the characters
377    * in <code>text</code>.
378    * @stable ICU 2.0
379    */
380   inline int8_t compare(const UnicodeString& text) const;
381 
382   /**
383    * Compare the characters bitwise in the range
384    * [<TT>start</TT>, <TT>start + length</TT>) with the characters
385    * in the <b>entire string</b> <TT>text</TT>.
386    * (The parameters "start" and "length" are not applied to the other text "text".)
387    * @param start the offset at which the compare operation begins
388    * @param length the number of characters of text to compare.
389    * @param text the other text to be compared against this string.
390    * @return The result of bitwise character comparison: 0 if this
391    * contains the same characters as <code>text</code>, -1 if the characters in
392    * this are bitwise less than the characters in <code>text</code>, +1 if the
393    * characters in this are bitwise greater than the characters
394    * in <code>text</code>.
395    * @stable ICU 2.0
396    */
397   inline int8_t compare(int32_t start,
398          int32_t length,
399          const UnicodeString& text) const;
400 
401   /**
402    * Compare the characters bitwise in the range
403    * [<TT>start</TT>, <TT>start + length</TT>) with the characters
404    * in <TT>srcText</TT> in the range
405    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
406    * @param start the offset at which the compare operation begins
407    * @param length the number of characters in this to compare.
408    * @param srcText the text to be compared
409    * @param srcStart the offset into <TT>srcText</TT> to start comparison
410    * @param srcLength the number of characters in <TT>src</TT> to compare
411    * @return The result of bitwise character comparison: 0 if this
412    * contains the same characters as <code>srcText</code>, -1 if the characters in
413    * this are bitwise less than the characters in <code>srcText</code>, +1 if the
414    * characters in this are bitwise greater than the characters
415    * in <code>srcText</code>.
416    * @stable ICU 2.0
417    */
418    inline int8_t compare(int32_t start,
419          int32_t length,
420          const UnicodeString& srcText,
421          int32_t srcStart,
422          int32_t srcLength) const;
423 
424   /**
425    * Compare the characters bitwise in this UnicodeString with the first
426    * <TT>srcLength</TT> characters in <TT>srcChars</TT>.
427    * @param srcChars The characters to compare to this UnicodeString.
428    * @param srcLength the number of characters in <TT>srcChars</TT> to compare
429    * @return The result of bitwise character comparison: 0 if this
430    * contains the same characters as <code>srcChars</code>, -1 if the characters in
431    * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
432    * characters in this are bitwise greater than the characters
433    * in <code>srcChars</code>.
434    * @stable ICU 2.0
435    */
436   inline int8_t compare(ConstChar16Ptr srcChars,
437          int32_t srcLength) const;
438 
439   /**
440    * Compare the characters bitwise in the range
441    * [<TT>start</TT>, <TT>start + length</TT>) with the first
442    * <TT>length</TT> characters in <TT>srcChars</TT>
443    * @param start the offset at which the compare operation begins
444    * @param length the number of characters to compare.
445    * @param srcChars the characters to be compared
446    * @return The result of bitwise character comparison: 0 if this
447    * contains the same characters as <code>srcChars</code>, -1 if the characters in
448    * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
449    * characters in this are bitwise greater than the characters
450    * in <code>srcChars</code>.
451    * @stable ICU 2.0
452    */
453   inline int8_t compare(int32_t start,
454          int32_t length,
455          const char16_t *srcChars) const;
456 
457   /**
458    * Compare the characters bitwise in the range
459    * [<TT>start</TT>, <TT>start + length</TT>) with the characters
460    * in <TT>srcChars</TT> in the range
461    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
462    * @param start the offset at which the compare operation begins
463    * @param length the number of characters in this to compare
464    * @param srcChars the characters to be compared
465    * @param srcStart the offset into <TT>srcChars</TT> to start comparison
466    * @param srcLength the number of characters in <TT>srcChars</TT> to compare
467    * @return The result of bitwise character comparison: 0 if this
468    * contains the same characters as <code>srcChars</code>, -1 if the characters in
469    * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
470    * characters in this are bitwise greater than the characters
471    * in <code>srcChars</code>.
472    * @stable ICU 2.0
473    */
474   inline int8_t compare(int32_t start,
475          int32_t length,
476          const char16_t *srcChars,
477          int32_t srcStart,
478          int32_t srcLength) const;
479 
480   /**
481    * Compare the characters bitwise in the range
482    * [<TT>start</TT>, <TT>limit</TT>) with the characters
483    * in <TT>srcText</TT> in the range
484    * [<TT>srcStart</TT>, <TT>srcLimit</TT>).
485    * @param start the offset at which the compare operation begins
486    * @param limit the offset immediately following the compare operation
487    * @param srcText the text to be compared
488    * @param srcStart the offset into <TT>srcText</TT> to start comparison
489    * @param srcLimit the offset into <TT>srcText</TT> to limit comparison
490    * @return The result of bitwise character comparison: 0 if this
491    * contains the same characters as <code>srcText</code>, -1 if the characters in
492    * this are bitwise less than the characters in <code>srcText</code>, +1 if the
493    * characters in this are bitwise greater than the characters
494    * in <code>srcText</code>.
495    * @stable ICU 2.0
496    */
497   inline int8_t compareBetween(int32_t start,
498             int32_t limit,
499             const UnicodeString& srcText,
500             int32_t srcStart,
501             int32_t srcLimit) const;
502 
503   /**
504    * Compare two Unicode strings in code point order.
505    * The result may be different from the results of compare(), operator<, etc.
506    * if supplementary characters are present:
507    *
508    * In UTF-16, supplementary characters (with code points U+10000 and above) are
509    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
510    * which means that they compare as less than some other BMP characters like U+feff.
511    * This function compares Unicode strings in code point order.
512    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
513    *
514    * @param text Another string to compare this one to.
515    * @return a negative/zero/positive integer corresponding to whether
516    * this string is less than/equal to/greater than the second one
517    * in code point order
518    * @stable ICU 2.0
519    */
520   inline int8_t compareCodePointOrder(const UnicodeString& text) const;
521 
522   /**
523    * Compare two Unicode strings in code point order.
524    * The result may be different from the results of compare(), operator<, etc.
525    * if supplementary characters are present:
526    *
527    * In UTF-16, supplementary characters (with code points U+10000 and above) are
528    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
529    * which means that they compare as less than some other BMP characters like U+feff.
530    * This function compares Unicode strings in code point order.
531    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
532    *
533    * @param start The start offset in this string at which the compare operation begins.
534    * @param length The number of code units from this string to compare.
535    * @param srcText Another string to compare this one to.
536    * @return a negative/zero/positive integer corresponding to whether
537    * this string is less than/equal to/greater than the second one
538    * in code point order
539    * @stable ICU 2.0
540    */
541   inline int8_t compareCodePointOrder(int32_t start,
542                                       int32_t length,
543                                       const UnicodeString& srcText) const;
544 
545   /**
546    * Compare two Unicode strings in code point order.
547    * The result may be different from the results of compare(), operator<, etc.
548    * if supplementary characters are present:
549    *
550    * In UTF-16, supplementary characters (with code points U+10000 and above) are
551    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
552    * which means that they compare as less than some other BMP characters like U+feff.
553    * This function compares Unicode strings in code point order.
554    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
555    *
556    * @param start The start offset in this string at which the compare operation begins.
557    * @param length The number of code units from this string to compare.
558    * @param srcText Another string to compare this one to.
559    * @param srcStart The start offset in that string at which the compare operation begins.
560    * @param srcLength The number of code units from that string to compare.
561    * @return a negative/zero/positive integer corresponding to whether
562    * this string is less than/equal to/greater than the second one
563    * in code point order
564    * @stable ICU 2.0
565    */
566    inline int8_t compareCodePointOrder(int32_t start,
567                                        int32_t length,
568                                        const UnicodeString& srcText,
569                                        int32_t srcStart,
570                                        int32_t srcLength) const;
571 
572   /**
573    * Compare two Unicode strings in code point order.
574    * The result may be different from the results of compare(), operator<, etc.
575    * if supplementary characters are present:
576    *
577    * In UTF-16, supplementary characters (with code points U+10000 and above) are
578    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
579    * which means that they compare as less than some other BMP characters like U+feff.
580    * This function compares Unicode strings in code point order.
581    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
582    *
583    * @param srcChars A pointer to another string to compare this one to.
584    * @param srcLength The number of code units from that string to compare.
585    * @return a negative/zero/positive integer corresponding to whether
586    * this string is less than/equal to/greater than the second one
587    * in code point order
588    * @stable ICU 2.0
589    */
590   inline int8_t compareCodePointOrder(ConstChar16Ptr srcChars,
591                                       int32_t srcLength) const;
592 
593   /**
594    * Compare two Unicode strings in code point order.
595    * The result may be different from the results of compare(), operator<, etc.
596    * if supplementary characters are present:
597    *
598    * In UTF-16, supplementary characters (with code points U+10000 and above) are
599    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
600    * which means that they compare as less than some other BMP characters like U+feff.
601    * This function compares Unicode strings in code point order.
602    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
603    *
604    * @param start The start offset in this string at which the compare operation begins.
605    * @param length The number of code units from this string to compare.
606    * @param srcChars A pointer to another string to compare this one to.
607    * @return a negative/zero/positive integer corresponding to whether
608    * this string is less than/equal to/greater than the second one
609    * in code point order
610    * @stable ICU 2.0
611    */
612   inline int8_t compareCodePointOrder(int32_t start,
613                                       int32_t length,
614                                       const char16_t *srcChars) const;
615 
616   /**
617    * Compare two Unicode strings in code point order.
618    * The result may be different from the results of compare(), operator<, etc.
619    * if supplementary characters are present:
620    *
621    * In UTF-16, supplementary characters (with code points U+10000 and above) are
622    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
623    * which means that they compare as less than some other BMP characters like U+feff.
624    * This function compares Unicode strings in code point order.
625    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
626    *
627    * @param start The start offset in this string at which the compare operation begins.
628    * @param length The number of code units from this string to compare.
629    * @param srcChars A pointer to another string to compare this one to.
630    * @param srcStart The start offset in that string at which the compare operation begins.
631    * @param srcLength The number of code units from that string to compare.
632    * @return a negative/zero/positive integer corresponding to whether
633    * this string is less than/equal to/greater than the second one
634    * in code point order
635    * @stable ICU 2.0
636    */
637   inline int8_t compareCodePointOrder(int32_t start,
638                                       int32_t length,
639                                       const char16_t *srcChars,
640                                       int32_t srcStart,
641                                       int32_t srcLength) const;
642 
643   /**
644    * Compare two Unicode strings in code point order.
645    * The result may be different from the results of compare(), operator<, etc.
646    * if supplementary characters are present:
647    *
648    * In UTF-16, supplementary characters (with code points U+10000 and above) are
649    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
650    * which means that they compare as less than some other BMP characters like U+feff.
651    * This function compares Unicode strings in code point order.
652    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
653    *
654    * @param start The start offset in this string at which the compare operation begins.
655    * @param limit The offset after the last code unit from this string to compare.
656    * @param srcText Another string to compare this one to.
657    * @param srcStart The start offset in that string at which the compare operation begins.
658    * @param srcLimit The offset after the last code unit from that string to compare.
659    * @return a negative/zero/positive integer corresponding to whether
660    * this string is less than/equal to/greater than the second one
661    * in code point order
662    * @stable ICU 2.0
663    */
664   inline int8_t compareCodePointOrderBetween(int32_t start,
665                                              int32_t limit,
666                                              const UnicodeString& srcText,
667                                              int32_t srcStart,
668                                              int32_t srcLimit) const;
669 
670   /**
671    * Compare two strings case-insensitively using full case folding.
672    * This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
673    *
674    * @param text Another string to compare this one to.
675    * @param options A bit set of options:
676    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
677    *     Comparison in code unit order with default case folding.
678    *
679    *   - U_COMPARE_CODE_POINT_ORDER
680    *     Set to choose code point order instead of code unit order
681    *     (see u_strCompare for details).
682    *
683    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
684    *
685    * @return A negative, zero, or positive integer indicating the comparison result.
686    * @stable ICU 2.0
687    */
688   inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
689 
690   /**
691    * Compare two strings case-insensitively using full case folding.
692    * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
693    *
694    * @param start The start offset in this string at which the compare operation begins.
695    * @param length The number of code units from this string to compare.
696    * @param srcText Another string to compare this one to.
697    * @param options A bit set of options:
698    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
699    *     Comparison in code unit order with default case folding.
700    *
701    *   - U_COMPARE_CODE_POINT_ORDER
702    *     Set to choose code point order instead of code unit order
703    *     (see u_strCompare for details).
704    *
705    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
706    *
707    * @return A negative, zero, or positive integer indicating the comparison result.
708    * @stable ICU 2.0
709    */
710   inline int8_t caseCompare(int32_t start,
711          int32_t length,
712          const UnicodeString& srcText,
713          uint32_t options) const;
714 
715   /**
716    * Compare two strings case-insensitively using full case folding.
717    * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
718    *
719    * @param start The start offset in this string at which the compare operation begins.
720    * @param length The number of code units from this string to compare.
721    * @param srcText Another string to compare this one to.
722    * @param srcStart The start offset in that string at which the compare operation begins.
723    * @param srcLength The number of code units from that string to compare.
724    * @param options A bit set of options:
725    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
726    *     Comparison in code unit order with default case folding.
727    *
728    *   - U_COMPARE_CODE_POINT_ORDER
729    *     Set to choose code point order instead of code unit order
730    *     (see u_strCompare for details).
731    *
732    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
733    *
734    * @return A negative, zero, or positive integer indicating the comparison result.
735    * @stable ICU 2.0
736    */
737   inline int8_t caseCompare(int32_t start,
738          int32_t length,
739          const UnicodeString& srcText,
740          int32_t srcStart,
741          int32_t srcLength,
742          uint32_t options) const;
743 
744   /**
745    * Compare two strings case-insensitively using full case folding.
746    * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
747    *
748    * @param srcChars A pointer to another string to compare this one to.
749    * @param srcLength The number of code units from that string to compare.
750    * @param options A bit set of options:
751    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
752    *     Comparison in code unit order with default case folding.
753    *
754    *   - U_COMPARE_CODE_POINT_ORDER
755    *     Set to choose code point order instead of code unit order
756    *     (see u_strCompare for details).
757    *
758    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
759    *
760    * @return A negative, zero, or positive integer indicating the comparison result.
761    * @stable ICU 2.0
762    */
763   inline int8_t caseCompare(ConstChar16Ptr srcChars,
764          int32_t srcLength,
765          uint32_t options) const;
766 
767   /**
768    * Compare two strings case-insensitively using full case folding.
769    * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
770    *
771    * @param start The start offset in this string at which the compare operation begins.
772    * @param length The number of code units from this string to compare.
773    * @param srcChars A pointer to another string to compare this one to.
774    * @param options A bit set of options:
775    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
776    *     Comparison in code unit order with default case folding.
777    *
778    *   - U_COMPARE_CODE_POINT_ORDER
779    *     Set to choose code point order instead of code unit order
780    *     (see u_strCompare for details).
781    *
782    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
783    *
784    * @return A negative, zero, or positive integer indicating the comparison result.
785    * @stable ICU 2.0
786    */
787   inline int8_t caseCompare(int32_t start,
788          int32_t length,
789          const char16_t *srcChars,
790          uint32_t options) const;
791 
792   /**
793    * Compare two strings case-insensitively using full case folding.
794    * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
795    *
796    * @param start The start offset in this string at which the compare operation begins.
797    * @param length The number of code units from this string to compare.
798    * @param srcChars A pointer to another string to compare this one to.
799    * @param srcStart The start offset in that string at which the compare operation begins.
800    * @param srcLength The number of code units from that string to compare.
801    * @param options A bit set of options:
802    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
803    *     Comparison in code unit order with default case folding.
804    *
805    *   - U_COMPARE_CODE_POINT_ORDER
806    *     Set to choose code point order instead of code unit order
807    *     (see u_strCompare for details).
808    *
809    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
810    *
811    * @return A negative, zero, or positive integer indicating the comparison result.
812    * @stable ICU 2.0
813    */
814   inline int8_t caseCompare(int32_t start,
815          int32_t length,
816          const char16_t *srcChars,
817          int32_t srcStart,
818          int32_t srcLength,
819          uint32_t options) const;
820 
821   /**
822    * Compare two strings case-insensitively using full case folding.
823    * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
824    *
825    * @param start The start offset in this string at which the compare operation begins.
826    * @param limit The offset after the last code unit from this string to compare.
827    * @param srcText Another string to compare this one to.
828    * @param srcStart The start offset in that string at which the compare operation begins.
829    * @param srcLimit The offset after the last code unit from that string to compare.
830    * @param options A bit set of options:
831    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
832    *     Comparison in code unit order with default case folding.
833    *
834    *   - U_COMPARE_CODE_POINT_ORDER
835    *     Set to choose code point order instead of code unit order
836    *     (see u_strCompare for details).
837    *
838    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
839    *
840    * @return A negative, zero, or positive integer indicating the comparison result.
841    * @stable ICU 2.0
842    */
843   inline int8_t caseCompareBetween(int32_t start,
844             int32_t limit,
845             const UnicodeString& srcText,
846             int32_t srcStart,
847             int32_t srcLimit,
848             uint32_t options) const;
849 
850   /**
851    * Determine if this starts with the characters in <TT>text</TT>
852    * @param text The text to match.
853    * @return TRUE if this starts with the characters in <TT>text</TT>,
854    * FALSE otherwise
855    * @stable ICU 2.0
856    */
857   inline UBool startsWith(const UnicodeString& text) const;
858 
859   /**
860    * Determine if this starts with the characters in <TT>srcText</TT>
861    * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
862    * @param srcText The text to match.
863    * @param srcStart the offset into <TT>srcText</TT> to start matching
864    * @param srcLength the number of characters in <TT>srcText</TT> to match
865    * @return TRUE if this starts with the characters in <TT>text</TT>,
866    * FALSE otherwise
867    * @stable ICU 2.0
868    */
869   inline UBool startsWith(const UnicodeString& srcText,
870             int32_t srcStart,
871             int32_t srcLength) const;
872 
873   /**
874    * Determine if this starts with the characters in <TT>srcChars</TT>
875    * @param srcChars The characters to match.
876    * @param srcLength the number of characters in <TT>srcChars</TT>
877    * @return TRUE if this starts with the characters in <TT>srcChars</TT>,
878    * FALSE otherwise
879    * @stable ICU 2.0
880    */
881   inline UBool startsWith(ConstChar16Ptr srcChars,
882             int32_t srcLength) const;
883 
884   /**
885    * Determine if this ends with the characters in <TT>srcChars</TT>
886    * in the range  [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
887    * @param srcChars The characters to match.
888    * @param srcStart the offset into <TT>srcText</TT> to start matching
889    * @param srcLength the number of characters in <TT>srcChars</TT> to match
890    * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise
891    * @stable ICU 2.0
892    */
893   inline UBool startsWith(const char16_t *srcChars,
894             int32_t srcStart,
895             int32_t srcLength) const;
896 
897   /**
898    * Determine if this ends with the characters in <TT>text</TT>
899    * @param text The text to match.
900    * @return TRUE if this ends with the characters in <TT>text</TT>,
901    * FALSE otherwise
902    * @stable ICU 2.0
903    */
904   inline UBool endsWith(const UnicodeString& text) const;
905 
906   /**
907    * Determine if this ends with the characters in <TT>srcText</TT>
908    * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
909    * @param srcText The text to match.
910    * @param srcStart the offset into <TT>srcText</TT> to start matching
911    * @param srcLength the number of characters in <TT>srcText</TT> to match
912    * @return TRUE if this ends with the characters in <TT>text</TT>,
913    * FALSE otherwise
914    * @stable ICU 2.0
915    */
916   inline UBool endsWith(const UnicodeString& srcText,
917           int32_t srcStart,
918           int32_t srcLength) const;
919 
920   /**
921    * Determine if this ends with the characters in <TT>srcChars</TT>
922    * @param srcChars The characters to match.
923    * @param srcLength the number of characters in <TT>srcChars</TT>
924    * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
925    * FALSE otherwise
926    * @stable ICU 2.0
927    */
928   inline UBool endsWith(ConstChar16Ptr srcChars,
929           int32_t srcLength) const;
930 
931   /**
932    * Determine if this ends with the characters in <TT>srcChars</TT>
933    * in the range  [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
934    * @param srcChars The characters to match.
935    * @param srcStart the offset into <TT>srcText</TT> to start matching
936    * @param srcLength the number of characters in <TT>srcChars</TT> to match
937    * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
938    * FALSE otherwise
939    * @stable ICU 2.0
940    */
941   inline UBool endsWith(const char16_t *srcChars,
942           int32_t srcStart,
943           int32_t srcLength) const;
944 
945 
946   /* Searching - bitwise only */
947 
948   /**
949    * Locate in this the first occurrence of the characters in <TT>text</TT>,
950    * using bitwise comparison.
951    * @param text The text to search for.
952    * @return The offset into this of the start of <TT>text</TT>,
953    * or -1 if not found.
954    * @stable ICU 2.0
955    */
956   inline int32_t indexOf(const UnicodeString& text) const;
957 
958   /**
959    * Locate in this the first occurrence of the characters in <TT>text</TT>
960    * starting at offset <TT>start</TT>, using bitwise comparison.
961    * @param text The text to search for.
962    * @param start The offset at which searching will start.
963    * @return The offset into this of the start of <TT>text</TT>,
964    * or -1 if not found.
965    * @stable ICU 2.0
966    */
967   inline int32_t indexOf(const UnicodeString& text,
968               int32_t start) const;
969 
970   /**
971    * Locate in this the first occurrence in the range
972    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
973    * in <TT>text</TT>, using bitwise comparison.
974    * @param text The text to search for.
975    * @param start The offset at which searching will start.
976    * @param length The number of characters to search
977    * @return The offset into this of the start of <TT>text</TT>,
978    * or -1 if not found.
979    * @stable ICU 2.0
980    */
981   inline int32_t indexOf(const UnicodeString& text,
982               int32_t start,
983               int32_t length) const;
984 
985   /**
986    * Locate in this the first occurrence in the range
987    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
988    *  in <TT>srcText</TT> in the range
989    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
990    * using bitwise comparison.
991    * @param srcText The text to search for.
992    * @param srcStart the offset into <TT>srcText</TT> at which
993    * to start matching
994    * @param srcLength the number of characters in <TT>srcText</TT> to match
995    * @param start the offset into this at which to start matching
996    * @param length the number of characters in this to search
997    * @return The offset into this of the start of <TT>text</TT>,
998    * or -1 if not found.
999    * @stable ICU 2.0
1000    */
1001   inline int32_t indexOf(const UnicodeString& srcText,
1002               int32_t srcStart,
1003               int32_t srcLength,
1004               int32_t start,
1005               int32_t length) const;
1006 
1007   /**
1008    * Locate in this the first occurrence of the characters in
1009    * <TT>srcChars</TT>
1010    * starting at offset <TT>start</TT>, using bitwise comparison.
1011    * @param srcChars The text to search for.
1012    * @param srcLength the number of characters in <TT>srcChars</TT> to match
1013    * @param start the offset into this at which to start matching
1014    * @return The offset into this of the start of <TT>text</TT>,
1015    * or -1 if not found.
1016    * @stable ICU 2.0
1017    */
1018   inline int32_t indexOf(const char16_t *srcChars,
1019               int32_t srcLength,
1020               int32_t start) const;
1021 
1022   /**
1023    * Locate in this the first occurrence in the range
1024    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1025    * in <TT>srcChars</TT>, using bitwise comparison.
1026    * @param srcChars The text to search for.
1027    * @param srcLength the number of characters in <TT>srcChars</TT>
1028    * @param start The offset at which searching will start.
1029    * @param length The number of characters to search
1030    * @return The offset into this of the start of <TT>srcChars</TT>,
1031    * or -1 if not found.
1032    * @stable ICU 2.0
1033    */
1034   inline int32_t indexOf(ConstChar16Ptr srcChars,
1035               int32_t srcLength,
1036               int32_t start,
1037               int32_t length) const;
1038 
1039   /**
1040    * Locate in this the first occurrence in the range
1041    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1042    * in <TT>srcChars</TT> in the range
1043    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1044    * using bitwise comparison.
1045    * @param srcChars The text to search for.
1046    * @param srcStart the offset into <TT>srcChars</TT> at which
1047    * to start matching
1048    * @param srcLength the number of characters in <TT>srcChars</TT> to match
1049    * @param start the offset into this at which to start matching
1050    * @param length the number of characters in this to search
1051    * @return The offset into this of the start of <TT>text</TT>,
1052    * or -1 if not found.
1053    * @stable ICU 2.0
1054    */
1055   int32_t indexOf(const char16_t *srcChars,
1056               int32_t srcStart,
1057               int32_t srcLength,
1058               int32_t start,
1059               int32_t length) const;
1060 
1061   /**
1062    * Locate in this the first occurrence of the BMP code point <code>c</code>,
1063    * using bitwise comparison.
1064    * @param c The code unit to search for.
1065    * @return The offset into this of <TT>c</TT>, or -1 if not found.
1066    * @stable ICU 2.0
1067    */
1068   inline int32_t indexOf(char16_t c) const;
1069 
1070   /**
1071    * Locate in this the first occurrence of the code point <TT>c</TT>,
1072    * using bitwise comparison.
1073    *
1074    * @param c The code point to search for.
1075    * @return The offset into this of <TT>c</TT>, or -1 if not found.
1076    * @stable ICU 2.0
1077    */
1078   inline int32_t indexOf(UChar32 c) const;
1079 
1080   /**
1081    * Locate in this the first occurrence of the BMP code point <code>c</code>,
1082    * starting at offset <TT>start</TT>, using bitwise comparison.
1083    * @param c The code unit to search for.
1084    * @param start The offset at which searching will start.
1085    * @return The offset into this of <TT>c</TT>, or -1 if not found.
1086    * @stable ICU 2.0
1087    */
1088   inline int32_t indexOf(char16_t c,
1089               int32_t start) const;
1090 
1091   /**
1092    * Locate in this the first occurrence of the code point <TT>c</TT>
1093    * starting at offset <TT>start</TT>, using bitwise comparison.
1094    *
1095    * @param c The code point to search for.
1096    * @param start The offset at which searching will start.
1097    * @return The offset into this of <TT>c</TT>, or -1 if not found.
1098    * @stable ICU 2.0
1099    */
1100   inline int32_t indexOf(UChar32 c,
1101               int32_t start) const;
1102 
1103   /**
1104    * Locate in this the first occurrence of the BMP code point <code>c</code>
1105    * in the range [<TT>start</TT>, <TT>start + length</TT>),
1106    * using bitwise comparison.
1107    * @param c The code unit to search for.
1108    * @param start the offset into this at which to start matching
1109    * @param length the number of characters in this to search
1110    * @return The offset into this of <TT>c</TT>, or -1 if not found.
1111    * @stable ICU 2.0
1112    */
1113   inline int32_t indexOf(char16_t c,
1114               int32_t start,
1115               int32_t length) const;
1116 
1117   /**
1118    * Locate in this the first occurrence of the code point <TT>c</TT>
1119    * in the range [<TT>start</TT>, <TT>start + length</TT>),
1120    * using bitwise comparison.
1121    *
1122    * @param c The code point to search for.
1123    * @param start the offset into this at which to start matching
1124    * @param length the number of characters in this to search
1125    * @return The offset into this of <TT>c</TT>, or -1 if not found.
1126    * @stable ICU 2.0
1127    */
1128   inline int32_t indexOf(UChar32 c,
1129               int32_t start,
1130               int32_t length) const;
1131 
1132   /**
1133    * Locate in this the last occurrence of the characters in <TT>text</TT>,
1134    * using bitwise comparison.
1135    * @param text The text to search for.
1136    * @return The offset into this of the start of <TT>text</TT>,
1137    * or -1 if not found.
1138    * @stable ICU 2.0
1139    */
1140   inline int32_t lastIndexOf(const UnicodeString& text) const;
1141 
1142   /**
1143    * Locate in this the last occurrence of the characters in <TT>text</TT>
1144    * starting at offset <TT>start</TT>, using bitwise comparison.
1145    * @param text The text to search for.
1146    * @param start The offset at which searching will start.
1147    * @return The offset into this of the start of <TT>text</TT>,
1148    * or -1 if not found.
1149    * @stable ICU 2.0
1150    */
1151   inline int32_t lastIndexOf(const UnicodeString& text,
1152               int32_t start) const;
1153 
1154   /**
1155    * Locate in this the last occurrence in the range
1156    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1157    * in <TT>text</TT>, using bitwise comparison.
1158    * @param text The text to search for.
1159    * @param start The offset at which searching will start.
1160    * @param length The number of characters to search
1161    * @return The offset into this of the start of <TT>text</TT>,
1162    * or -1 if not found.
1163    * @stable ICU 2.0
1164    */
1165   inline int32_t lastIndexOf(const UnicodeString& text,
1166               int32_t start,
1167               int32_t length) const;
1168 
1169   /**
1170    * Locate in this the last occurrence in the range
1171    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1172    * in <TT>srcText</TT> in the range
1173    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1174    * using bitwise comparison.
1175    * @param srcText The text to search for.
1176    * @param srcStart the offset into <TT>srcText</TT> at which
1177    * to start matching
1178    * @param srcLength the number of characters in <TT>srcText</TT> to match
1179    * @param start the offset into this at which to start matching
1180    * @param length the number of characters in this to search
1181    * @return The offset into this of the start of <TT>text</TT>,
1182    * or -1 if not found.
1183    * @stable ICU 2.0
1184    */
1185   inline int32_t lastIndexOf(const UnicodeString& srcText,
1186               int32_t srcStart,
1187               int32_t srcLength,
1188               int32_t start,
1189               int32_t length) const;
1190 
1191   /**
1192    * Locate in this the last occurrence of the characters in <TT>srcChars</TT>
1193    * starting at offset <TT>start</TT>, using bitwise comparison.
1194    * @param srcChars The text to search for.
1195    * @param srcLength the number of characters in <TT>srcChars</TT> to match
1196    * @param start the offset into this at which to start matching
1197    * @return The offset into this of the start of <TT>text</TT>,
1198    * or -1 if not found.
1199    * @stable ICU 2.0
1200    */
1201   inline int32_t lastIndexOf(const char16_t *srcChars,
1202               int32_t srcLength,
1203               int32_t start) const;
1204 
1205   /**
1206    * Locate in this the last occurrence in the range
1207    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1208    * in <TT>srcChars</TT>, using bitwise comparison.
1209    * @param srcChars The text to search for.
1210    * @param srcLength the number of characters in <TT>srcChars</TT>
1211    * @param start The offset at which searching will start.
1212    * @param length The number of characters to search
1213    * @return The offset into this of the start of <TT>srcChars</TT>,
1214    * or -1 if not found.
1215    * @stable ICU 2.0
1216    */
1217   inline int32_t lastIndexOf(ConstChar16Ptr srcChars,
1218               int32_t srcLength,
1219               int32_t start,
1220               int32_t length) const;
1221 
1222   /**
1223    * Locate in this the last occurrence in the range
1224    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1225    * in <TT>srcChars</TT> in the range
1226    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1227    * using bitwise comparison.
1228    * @param srcChars The text to search for.
1229    * @param srcStart the offset into <TT>srcChars</TT> at which
1230    * to start matching
1231    * @param srcLength the number of characters in <TT>srcChars</TT> to match
1232    * @param start the offset into this at which to start matching
1233    * @param length the number of characters in this to search
1234    * @return The offset into this of the start of <TT>text</TT>,
1235    * or -1 if not found.
1236    * @stable ICU 2.0
1237    */
1238   int32_t lastIndexOf(const char16_t *srcChars,
1239               int32_t srcStart,
1240               int32_t srcLength,
1241               int32_t start,
1242               int32_t length) const;
1243 
1244   /**
1245    * Locate in this the last occurrence of the BMP code point <code>c</code>,
1246    * using bitwise comparison.
1247    * @param c The code unit to search for.
1248    * @return The offset into this of <TT>c</TT>, or -1 if not found.
1249    * @stable ICU 2.0
1250    */
1251   inline int32_t lastIndexOf(char16_t c) const;
1252 
1253   /**
1254    * Locate in this the last occurrence of the code point <TT>c</TT>,
1255    * using bitwise comparison.
1256    *
1257    * @param c The code point to search for.
1258    * @return The offset into this of <TT>c</TT>, or -1 if not found.
1259    * @stable ICU 2.0
1260    */
1261   inline int32_t lastIndexOf(UChar32 c) const;
1262 
1263   /**
1264    * Locate in this the last occurrence of the BMP code point <code>c</code>
1265    * starting at offset <TT>start</TT>, using bitwise comparison.
1266    * @param c The code unit to search for.
1267    * @param start The offset at which searching will start.
1268    * @return The offset into this of <TT>c</TT>, or -1 if not found.
1269    * @stable ICU 2.0
1270    */
1271   inline int32_t lastIndexOf(char16_t c,
1272               int32_t start) const;
1273 
1274   /**
1275    * Locate in this the last occurrence of the code point <TT>c</TT>
1276    * starting at offset <TT>start</TT>, using bitwise comparison.
1277    *
1278    * @param c The code point to search for.
1279    * @param start The offset at which searching will start.
1280    * @return The offset into this of <TT>c</TT>, or -1 if not found.
1281    * @stable ICU 2.0
1282    */
1283   inline int32_t lastIndexOf(UChar32 c,
1284               int32_t start) const;
1285 
1286   /**
1287    * Locate in this the last occurrence of the BMP code point <code>c</code>
1288    * in the range [<TT>start</TT>, <TT>start + length</TT>),
1289    * using bitwise comparison.
1290    * @param c The code unit to search for.
1291    * @param start the offset into this at which to start matching
1292    * @param length the number of characters in this to search
1293    * @return The offset into this of <TT>c</TT>, or -1 if not found.
1294    * @stable ICU 2.0
1295    */
1296   inline int32_t lastIndexOf(char16_t c,
1297               int32_t start,
1298               int32_t length) const;
1299 
1300   /**
1301    * Locate in this the last occurrence of the code point <TT>c</TT>
1302    * in the range [<TT>start</TT>, <TT>start + length</TT>),
1303    * using bitwise comparison.
1304    *
1305    * @param c The code point to search for.
1306    * @param start the offset into this at which to start matching
1307    * @param length the number of characters in this to search
1308    * @return The offset into this of <TT>c</TT>, or -1 if not found.
1309    * @stable ICU 2.0
1310    */
1311   inline int32_t lastIndexOf(UChar32 c,
1312               int32_t start,
1313               int32_t length) const;
1314 
1315 
1316   /* Character access */
1317 
1318   /**
1319    * Return the code unit at offset <tt>offset</tt>.
1320    * If the offset is not valid (0..length()-1) then U+ffff is returned.
1321    * @param offset a valid offset into the text
1322    * @return the code unit at offset <tt>offset</tt>
1323    *         or 0xffff if the offset is not valid for this string
1324    * @stable ICU 2.0
1325    */
1326   inline char16_t charAt(int32_t offset) const;
1327 
1328   /**
1329    * Return the code unit at offset <tt>offset</tt>.
1330    * If the offset is not valid (0..length()-1) then U+ffff is returned.
1331    * @param offset a valid offset into the text
1332    * @return the code unit at offset <tt>offset</tt>
1333    * @stable ICU 2.0
1334    */
1335   inline char16_t operator[] (int32_t offset) const;
1336 
1337   /**
1338    * Return the code point that contains the code unit
1339    * at offset <tt>offset</tt>.
1340    * If the offset is not valid (0..length()-1) then U+ffff is returned.
1341    * @param offset a valid offset into the text
1342    * that indicates the text offset of any of the code units
1343    * that will be assembled into a code point (21-bit value) and returned
1344    * @return the code point of text at <tt>offset</tt>
1345    *         or 0xffff if the offset is not valid for this string
1346    * @stable ICU 2.0
1347    */
1348   UChar32 char32At(int32_t offset) const;
1349 
1350   /**
1351    * Adjust a random-access offset so that
1352    * it points to the beginning of a Unicode character.
1353    * The offset that is passed in points to
1354    * any code unit of a code point,
1355    * while the returned offset will point to the first code unit
1356    * of the same code point.
1357    * In UTF-16, if the input offset points to a second surrogate
1358    * of a surrogate pair, then the returned offset will point
1359    * to the first surrogate.
1360    * @param offset a valid offset into one code point of the text
1361    * @return offset of the first code unit of the same code point
1362    * @see U16_SET_CP_START
1363    * @stable ICU 2.0
1364    */
1365   int32_t getChar32Start(int32_t offset) const;
1366 
1367   /**
1368    * Adjust a random-access offset so that
1369    * it points behind a Unicode character.
1370    * The offset that is passed in points behind
1371    * any code unit of a code point,
1372    * while the returned offset will point behind the last code unit
1373    * of the same code point.
1374    * In UTF-16, if the input offset points behind the first surrogate
1375    * (i.e., to the second surrogate)
1376    * of a surrogate pair, then the returned offset will point
1377    * behind the second surrogate (i.e., to the first surrogate).
1378    * @param offset a valid offset after any code unit of a code point of the text
1379    * @return offset of the first code unit after the same code point
1380    * @see U16_SET_CP_LIMIT
1381    * @stable ICU 2.0
1382    */
1383   int32_t getChar32Limit(int32_t offset) const;
1384 
1385   /**
1386    * Move the code unit index along the string by delta code points.
1387    * Interpret the input index as a code unit-based offset into the string,
1388    * move the index forward or backward by delta code points, and
1389    * return the resulting index.
1390    * The input index should point to the first code unit of a code point,
1391    * if there is more than one.
1392    *
1393    * Both input and output indexes are code unit-based as for all
1394    * string indexes/offsets in ICU (and other libraries, like MBCS char*).
1395    * If delta<0 then the index is moved backward (toward the start of the string).
1396    * If delta>0 then the index is moved forward (toward the end of the string).
1397    *
1398    * This behaves like CharacterIterator::move32(delta, kCurrent).
1399    *
1400    * Behavior for out-of-bounds indexes:
1401    * <code>moveIndex32</code> pins the input index to 0..length(), i.e.,
1402    * if the input index<0 then it is pinned to 0;
1403    * if it is index>length() then it is pinned to length().
1404    * Afterwards, the index is moved by <code>delta</code> code points
1405    * forward or backward,
1406    * but no further backward than to 0 and no further forward than to length().
1407    * The resulting index return value will be in between 0 and length(), inclusively.
1408    *
1409    * Examples:
1410    * <pre>
1411    * // s has code points 'a' U+10000 'b' U+10ffff U+2029
1412    * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape();
1413    *
1414    * // initial index: position of U+10000
1415    * int32_t index=1;
1416    *
1417    * // the following examples will all result in index==4, position of U+10ffff
1418    *
1419    * // skip 2 code points from some position in the string
1420    * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
1421    *
1422    * // go to the 3rd code point from the start of s (0-based)
1423    * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
1424    *
1425    * // go to the next-to-last code point of s
1426    * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
1427    * </pre>
1428    *
1429    * @param index input code unit index
1430    * @param delta (signed) code point count to move the index forward or backward
1431    *        in the string
1432    * @return the resulting code unit index
1433    * @stable ICU 2.0
1434    */
1435   int32_t moveIndex32(int32_t index, int32_t delta) const;
1436 
1437   /* Substring extraction */
1438 
1439   /**
1440    * Copy the characters in the range
1441    * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,
1442    * beginning at <tt>dstStart</tt>.
1443    * If the string aliases to <code>dst</code> itself as an external buffer,
1444    * then extract() will not copy the contents.
1445    *
1446    * @param start offset of first character which will be copied into the array
1447    * @param length the number of characters to extract
1448    * @param dst array in which to copy characters.  The length of <tt>dst</tt>
1449    * must be at least (<tt>dstStart + length</tt>).
1450    * @param dstStart the offset in <TT>dst</TT> where the first character
1451    * will be extracted
1452    * @stable ICU 2.0
1453    */
1454   inline void extract(int32_t start,
1455            int32_t length,
1456            Char16Ptr dst,
1457            int32_t dstStart = 0) const;
1458 
1459   /**
1460    * Copy the contents of the string into dest.
1461    * This is a convenience function that
1462    * checks if there is enough space in dest,
1463    * extracts the entire string if possible,
1464    * and NUL-terminates dest if possible.
1465    *
1466    * If the string fits into dest but cannot be NUL-terminated
1467    * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
1468    * If the string itself does not fit into dest
1469    * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
1470    *
1471    * If the string aliases to <code>dest</code> itself as an external buffer,
1472    * then extract() will not copy the contents.
1473    *
1474    * @param dest Destination string buffer.
1475    * @param destCapacity Number of char16_ts available at dest.
1476    * @param errorCode ICU error code.
1477    * @return length()
1478    * @stable ICU 2.0
1479    */
1480   int32_t
1481   extract(Char16Ptr dest, int32_t destCapacity,
1482           UErrorCode &errorCode) const;
1483 
1484   /**
1485    * Copy the characters in the range
1486    * [<tt>start</tt>, <tt>start + length</tt>) into the  UnicodeString
1487    * <tt>target</tt>.
1488    * @param start offset of first character which will be copied
1489    * @param length the number of characters to extract
1490    * @param target UnicodeString into which to copy characters.
1491    * @return A reference to <TT>target</TT>
1492    * @stable ICU 2.0
1493    */
1494   inline void extract(int32_t start,
1495            int32_t length,
1496            UnicodeString& target) const;
1497 
1498   /**
1499    * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
1500    * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>.
1501    * @param start offset of first character which will be copied into the array
1502    * @param limit offset immediately following the last character to be copied
1503    * @param dst array in which to copy characters.  The length of <tt>dst</tt>
1504    * must be at least (<tt>dstStart + (limit - start)</tt>).
1505    * @param dstStart the offset in <TT>dst</TT> where the first character
1506    * will be extracted
1507    * @stable ICU 2.0
1508    */
1509   inline void extractBetween(int32_t start,
1510               int32_t limit,
1511               char16_t *dst,
1512               int32_t dstStart = 0) const;
1513 
1514   /**
1515    * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
1516    * into the UnicodeString <tt>target</tt>.  Replaceable API.
1517    * @param start offset of first character which will be copied
1518    * @param limit offset immediately following the last character to be copied
1519    * @param target UnicodeString into which to copy characters.
1520    * @return A reference to <TT>target</TT>
1521    * @stable ICU 2.0
1522    */
1523   virtual void extractBetween(int32_t start,
1524               int32_t limit,
1525               UnicodeString& target) const;
1526 
1527   /**
1528    * Copy the characters in the range
1529    * [<tt>start</TT>, <tt>start + startLength</TT>) into an array of characters.
1530    * All characters must be invariant (see utypes.h).
1531    * Use US_INV as the last, signature-distinguishing parameter.
1532    *
1533    * This function does not write any more than <code>targetCapacity</code>
1534    * characters but returns the length of the entire output string
1535    * so that one can allocate a larger buffer and call the function again
1536    * if necessary.
1537    * The output string is NUL-terminated if possible.
1538    *
1539    * @param start offset of first character which will be copied
1540    * @param startLength the number of characters to extract
1541    * @param target the target buffer for extraction, can be NULL
1542    *               if targetLength is 0
1543    * @param targetCapacity the length of the target buffer
1544    * @param inv Signature-distinguishing paramater, use US_INV.
1545    * @return the output string length, not including the terminating NUL
1546    * @stable ICU 3.2
1547    */
1548   int32_t extract(int32_t start,
1549            int32_t startLength,
1550            char *target,
1551            int32_t targetCapacity,
1552            enum EInvariant inv) const;
1553 
1554 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
1555 
1556   /**
1557    * Copy the characters in the range
1558    * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1559    * in the platform's default codepage.
1560    * This function does not write any more than <code>targetLength</code>
1561    * characters but returns the length of the entire output string
1562    * so that one can allocate a larger buffer and call the function again
1563    * if necessary.
1564    * The output string is NUL-terminated if possible.
1565    *
1566    * @param start offset of first character which will be copied
1567    * @param startLength the number of characters to extract
1568    * @param target the target buffer for extraction
1569    * @param targetLength the length of the target buffer
1570    * If <TT>target</TT> is NULL, then the number of bytes required for
1571    * <TT>target</TT> is returned.
1572    * @return the output string length, not including the terminating NUL
1573    * @stable ICU 2.0
1574    */
1575   int32_t extract(int32_t start,
1576            int32_t startLength,
1577            char *target,
1578            uint32_t targetLength) const;
1579 
1580 #endif
1581 
1582 #if !UCONFIG_NO_CONVERSION
1583 
1584   /**
1585    * Copy the characters in the range
1586    * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1587    * in a specified codepage.
1588    * The output string is NUL-terminated.
1589    *
1590    * Recommendation: For invariant-character strings use
1591    * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1592    * because it avoids object code dependencies of UnicodeString on
1593    * the conversion code.
1594    *
1595    * @param start offset of first character which will be copied
1596    * @param startLength the number of characters to extract
1597    * @param target the target buffer for extraction
1598    * @param codepage the desired codepage for the characters.  0 has
1599    * the special meaning of the default codepage
1600    * If <code>codepage</code> is an empty string (<code>""</code>),
1601    * then a simple conversion is performed on the codepage-invariant
1602    * subset ("invariant characters") of the platform encoding. See utypes.h.
1603    * If <TT>target</TT> is NULL, then the number of bytes required for
1604    * <TT>target</TT> is returned. It is assumed that the target is big enough
1605    * to fit all of the characters.
1606    * @return the output string length, not including the terminating NUL
1607    * @stable ICU 2.0
1608    */
1609   inline int32_t extract(int32_t start,
1610                  int32_t startLength,
1611                  char *target,
1612                  const char *codepage = 0) const;
1613 
1614   /**
1615    * Copy the characters in the range
1616    * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1617    * in a specified codepage.
1618    * This function does not write any more than <code>targetLength</code>
1619    * characters but returns the length of the entire output string
1620    * so that one can allocate a larger buffer and call the function again
1621    * if necessary.
1622    * The output string is NUL-terminated if possible.
1623    *
1624    * Recommendation: For invariant-character strings use
1625    * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1626    * because it avoids object code dependencies of UnicodeString on
1627    * the conversion code.
1628    *
1629    * @param start offset of first character which will be copied
1630    * @param startLength the number of characters to extract
1631    * @param target the target buffer for extraction
1632    * @param targetLength the length of the target buffer
1633    * @param codepage the desired codepage for the characters.  0 has
1634    * the special meaning of the default codepage
1635    * If <code>codepage</code> is an empty string (<code>""</code>),
1636    * then a simple conversion is performed on the codepage-invariant
1637    * subset ("invariant characters") of the platform encoding. See utypes.h.
1638    * If <TT>target</TT> is NULL, then the number of bytes required for
1639    * <TT>target</TT> is returned.
1640    * @return the output string length, not including the terminating NUL
1641    * @stable ICU 2.0
1642    */
1643   int32_t extract(int32_t start,
1644            int32_t startLength,
1645            char *target,
1646            uint32_t targetLength,
1647            const char *codepage) const;
1648 
1649   /**
1650    * Convert the UnicodeString into a codepage string using an existing UConverter.
1651    * The output string is NUL-terminated if possible.
1652    *
1653    * This function avoids the overhead of opening and closing a converter if
1654    * multiple strings are extracted.
1655    *
1656    * @param dest destination string buffer, can be NULL if destCapacity==0
1657    * @param destCapacity the number of chars available at dest
1658    * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
1659    *        or NULL for the default converter
1660    * @param errorCode normal ICU error code
1661    * @return the length of the output string, not counting the terminating NUL;
1662    *         if the length is greater than destCapacity, then the string will not fit
1663    *         and a buffer of the indicated length would need to be passed in
1664    * @stable ICU 2.0
1665    */
1666   int32_t extract(char *dest, int32_t destCapacity,
1667                   UConverter *cnv,
1668                   UErrorCode &errorCode) const;
1669 
1670 #endif
1671 
1672   /**
1673    * Create a temporary substring for the specified range.
1674    * Unlike the substring constructor and setTo() functions,
1675    * the object returned here will be a read-only alias (using getBuffer())
1676    * rather than copying the text.
1677    * As a result, this substring operation is much faster but requires
1678    * that the original string not be modified or deleted during the lifetime
1679    * of the returned substring object.
1680    * @param start offset of the first character visible in the substring
1681    * @param length length of the substring
1682    * @return a read-only alias UnicodeString object for the substring
1683    * @stable ICU 4.4
1684    */
1685   UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
1686 
1687   /**
1688    * Create a temporary substring for the specified range.
1689    * Same as tempSubString(start, length) except that the substring range
1690    * is specified as a (start, limit) pair (with an exclusive limit index)
1691    * rather than a (start, length) pair.
1692    * @param start offset of the first character visible in the substring
1693    * @param limit offset immediately following the last character visible in the substring
1694    * @return a read-only alias UnicodeString object for the substring
1695    * @stable ICU 4.4
1696    */
1697   inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
1698 
1699   /**
1700    * Convert the UnicodeString to UTF-8 and write the result
1701    * to a ByteSink. This is called by toUTF8String().
1702    * Unpaired surrogates are replaced with U+FFFD.
1703    * Calls u_strToUTF8WithSub().
1704    *
1705    * @param sink A ByteSink to which the UTF-8 version of the string is written.
1706    *             sink.Flush() is called at the end.
1707    * @stable ICU 4.2
1708    * @see toUTF8String
1709    */
1710   void toUTF8(ByteSink &sink) const;
1711 
1712   /**
1713    * Convert the UnicodeString to UTF-8 and append the result
1714    * to a standard string.
1715    * Unpaired surrogates are replaced with U+FFFD.
1716    * Calls toUTF8().
1717    *
1718    * @param result A standard string (or a compatible object)
1719    *        to which the UTF-8 version of the string is appended.
1720    * @return The string object.
1721    * @stable ICU 4.2
1722    * @see toUTF8
1723    */
1724   template<typename StringClass>
toUTF8String(StringClass & result)1725   StringClass &toUTF8String(StringClass &result) const {
1726     StringByteSink<StringClass> sbs(&result, length());
1727     toUTF8(sbs);
1728     return result;
1729   }
1730 
1731   /**
1732    * Convert the UnicodeString to UTF-32.
1733    * Unpaired surrogates are replaced with U+FFFD.
1734    * Calls u_strToUTF32WithSub().
1735    *
1736    * @param utf32 destination string buffer, can be NULL if capacity==0
1737    * @param capacity the number of UChar32s available at utf32
1738    * @param errorCode Standard ICU error code. Its input value must
1739    *                  pass the U_SUCCESS() test, or else the function returns
1740    *                  immediately. Check for U_FAILURE() on output or use with
1741    *                  function chaining. (See User Guide for details.)
1742    * @return The length of the UTF-32 string.
1743    * @see fromUTF32
1744    * @stable ICU 4.2
1745    */
1746   int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
1747 
1748   /* Length operations */
1749 
1750   /**
1751    * Return the length of the UnicodeString object.
1752    * The length is the number of char16_t code units are in the UnicodeString.
1753    * If you want the number of code points, please use countChar32().
1754    * @return the length of the UnicodeString object
1755    * @see countChar32
1756    * @stable ICU 2.0
1757    */
1758   inline int32_t length(void) const;
1759 
1760   /**
1761    * Count Unicode code points in the length char16_t code units of the string.
1762    * A code point may occupy either one or two char16_t code units.
1763    * Counting code points involves reading all code units.
1764    *
1765    * This functions is basically the inverse of moveIndex32().
1766    *
1767    * @param start the index of the first code unit to check
1768    * @param length the number of char16_t code units to check
1769    * @return the number of code points in the specified code units
1770    * @see length
1771    * @stable ICU 2.0
1772    */
1773   int32_t
1774   countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
1775 
1776   /**
1777    * Check if the length char16_t code units of the string
1778    * contain more Unicode code points than a certain number.
1779    * This is more efficient than counting all code points in this part of the string
1780    * and comparing that number with a threshold.
1781    * This function may not need to scan the string at all if the length
1782    * falls within a certain range, and
1783    * never needs to count more than 'number+1' code points.
1784    * Logically equivalent to (countChar32(start, length)>number).
1785    * A Unicode code point may occupy either one or two char16_t code units.
1786    *
1787    * @param start the index of the first code unit to check (0 for the entire string)
1788    * @param length the number of char16_t code units to check
1789    *               (use INT32_MAX for the entire string; remember that start/length
1790    *                values are pinned)
1791    * @param number The number of code points in the (sub)string is compared against
1792    *               the 'number' parameter.
1793    * @return Boolean value for whether the string contains more Unicode code points
1794    *         than 'number'. Same as (u_countChar32(s, length)>number).
1795    * @see countChar32
1796    * @see u_strHasMoreChar32Than
1797    * @stable ICU 2.4
1798    */
1799   UBool
1800   hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
1801 
1802   /**
1803    * Determine if this string is empty.
1804    * @return TRUE if this string contains 0 characters, FALSE otherwise.
1805    * @stable ICU 2.0
1806    */
1807   inline UBool isEmpty(void) const;
1808 
1809   /**
1810    * Return the capacity of the internal buffer of the UnicodeString object.
1811    * This is useful together with the getBuffer functions.
1812    * See there for details.
1813    *
1814    * @return the number of char16_ts available in the internal buffer
1815    * @see getBuffer
1816    * @stable ICU 2.0
1817    */
1818   inline int32_t getCapacity(void) const;
1819 
1820   /* Other operations */
1821 
1822   /**
1823    * Generate a hash code for this object.
1824    * @return The hash code of this UnicodeString.
1825    * @stable ICU 2.0
1826    */
1827   inline int32_t hashCode(void) const;
1828 
1829   /**
1830    * Determine if this object contains a valid string.
1831    * A bogus string has no value. It is different from an empty string,
1832    * although in both cases isEmpty() returns TRUE and length() returns 0.
1833    * setToBogus() and isBogus() can be used to indicate that no string value is available.
1834    * For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and
1835    * length() returns 0.
1836    *
1837    * @return TRUE if the string is bogus/invalid, FALSE otherwise
1838    * @see setToBogus()
1839    * @stable ICU 2.0
1840    */
1841   inline UBool isBogus(void) const;
1842 
1843 
1844   //========================================
1845   // Write operations
1846   //========================================
1847 
1848   /* Assignment operations */
1849 
1850   /**
1851    * Assignment operator.  Replace the characters in this UnicodeString
1852    * with the characters from <TT>srcText</TT>.
1853    *
1854    * Starting with ICU 2.4, the assignment operator and the copy constructor
1855    * allocate a new buffer and copy the buffer contents even for readonly aliases.
1856    * By contrast, the fastCopyFrom() function implements the old,
1857    * more efficient but less safe behavior
1858    * of making this string also a readonly alias to the same buffer.
1859    *
1860    * If the source object has an "open" buffer from getBuffer(minCapacity),
1861    * then the copy is an empty string.
1862    *
1863    * @param srcText The text containing the characters to replace
1864    * @return a reference to this
1865    * @stable ICU 2.0
1866    * @see fastCopyFrom
1867    */
1868   UnicodeString &operator=(const UnicodeString &srcText);
1869 
1870   /**
1871    * Almost the same as the assignment operator.
1872    * Replace the characters in this UnicodeString
1873    * with the characters from <code>srcText</code>.
1874    *
1875    * This function works the same as the assignment operator
1876    * for all strings except for ones that are readonly aliases.
1877    *
1878    * Starting with ICU 2.4, the assignment operator and the copy constructor
1879    * allocate a new buffer and copy the buffer contents even for readonly aliases.
1880    * This function implements the old, more efficient but less safe behavior
1881    * of making this string also a readonly alias to the same buffer.
1882    *
1883    * The fastCopyFrom function must be used only if it is known that the lifetime of
1884    * this UnicodeString does not exceed the lifetime of the aliased buffer
1885    * including its contents, for example for strings from resource bundles
1886    * or aliases to string constants.
1887    *
1888    * If the source object has an "open" buffer from getBuffer(minCapacity),
1889    * then the copy is an empty string.
1890    *
1891    * @param src The text containing the characters to replace.
1892    * @return a reference to this
1893    * @stable ICU 2.4
1894    */
1895   UnicodeString &fastCopyFrom(const UnicodeString &src);
1896 
1897   /**
1898    * Move assignment operator; might leave src in bogus state.
1899    * This string will have the same contents and state that the source string had.
1900    * The behavior is undefined if *this and src are the same object.
1901    * @param src source string
1902    * @return *this
1903    * @stable ICU 56
1904    */
1905   UnicodeString &operator=(UnicodeString &&src) U_NOEXCEPT {
1906     return moveFrom(src);
1907   }
1908 
1909   // do not use #ifndef U_HIDE_DRAFT_API for moveFrom, needed by non-draft API
1910   /**
1911    * Move assignment; might leave src in bogus state.
1912    * This string will have the same contents and state that the source string had.
1913    * The behavior is undefined if *this and src are the same object.
1914    *
1915    * Can be called explicitly, does not need C++11 support.
1916    * @param src source string
1917    * @return *this
1918    * @draft ICU 56
1919    */
1920   UnicodeString &moveFrom(UnicodeString &src) U_NOEXCEPT;
1921 
1922   /**
1923    * Swap strings.
1924    * @param other other string
1925    * @stable ICU 56
1926    */
1927   void swap(UnicodeString &other) U_NOEXCEPT;
1928 
1929   /**
1930    * Non-member UnicodeString swap function.
1931    * @param s1 will get s2's contents and state
1932    * @param s2 will get s1's contents and state
1933    * @stable ICU 56
1934    */
1935   friend U_COMMON_API inline void U_EXPORT2
swap(UnicodeString & s1,UnicodeString & s2)1936   swap(UnicodeString &s1, UnicodeString &s2) U_NOEXCEPT {
1937     s1.swap(s2);
1938   }
1939 
1940   /**
1941    * Assignment operator.  Replace the characters in this UnicodeString
1942    * with the code unit <TT>ch</TT>.
1943    * @param ch the code unit to replace
1944    * @return a reference to this
1945    * @stable ICU 2.0
1946    */
1947   inline UnicodeString& operator= (char16_t ch);
1948 
1949   /**
1950    * Assignment operator.  Replace the characters in this UnicodeString
1951    * with the code point <TT>ch</TT>.
1952    * @param ch the code point to replace
1953    * @return a reference to this
1954    * @stable ICU 2.0
1955    */
1956   inline UnicodeString& operator= (UChar32 ch);
1957 
1958   /**
1959    * Set the text in the UnicodeString object to the characters
1960    * in <TT>srcText</TT> in the range
1961    * [<TT>srcStart</TT>, <TT>srcText.length()</TT>).
1962    * <TT>srcText</TT> is not modified.
1963    * @param srcText the source for the new characters
1964    * @param srcStart the offset into <TT>srcText</TT> where new characters
1965    * will be obtained
1966    * @return a reference to this
1967    * @stable ICU 2.2
1968    */
1969   inline UnicodeString& setTo(const UnicodeString& srcText,
1970                int32_t srcStart);
1971 
1972   /**
1973    * Set the text in the UnicodeString object to the characters
1974    * in <TT>srcText</TT> in the range
1975    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
1976    * <TT>srcText</TT> is not modified.
1977    * @param srcText the source for the new characters
1978    * @param srcStart the offset into <TT>srcText</TT> where new characters
1979    * will be obtained
1980    * @param srcLength the number of characters in <TT>srcText</TT> in the
1981    * replace string.
1982    * @return a reference to this
1983    * @stable ICU 2.0
1984    */
1985   inline UnicodeString& setTo(const UnicodeString& srcText,
1986                int32_t srcStart,
1987                int32_t srcLength);
1988 
1989   /**
1990    * Set the text in the UnicodeString object to the characters in
1991    * <TT>srcText</TT>.
1992    * <TT>srcText</TT> is not modified.
1993    * @param srcText the source for the new characters
1994    * @return a reference to this
1995    * @stable ICU 2.0
1996    */
1997   inline UnicodeString& setTo(const UnicodeString& srcText);
1998 
1999   /**
2000    * Set the characters in the UnicodeString object to the characters
2001    * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
2002    * @param srcChars the source for the new characters
2003    * @param srcLength the number of Unicode characters in srcChars.
2004    * @return a reference to this
2005    * @stable ICU 2.0
2006    */
2007   inline UnicodeString& setTo(const char16_t *srcChars,
2008                int32_t srcLength);
2009 
2010   /**
2011    * Set the characters in the UnicodeString object to the code unit
2012    * <TT>srcChar</TT>.
2013    * @param srcChar the code unit which becomes the UnicodeString's character
2014    * content
2015    * @return a reference to this
2016    * @stable ICU 2.0
2017    */
2018   UnicodeString& setTo(char16_t srcChar);
2019 
2020   /**
2021    * Set the characters in the UnicodeString object to the code point
2022    * <TT>srcChar</TT>.
2023    * @param srcChar the code point which becomes the UnicodeString's character
2024    * content
2025    * @return a reference to this
2026    * @stable ICU 2.0
2027    */
2028   UnicodeString& setTo(UChar32 srcChar);
2029 
2030   /**
2031    * Aliasing setTo() function, analogous to the readonly-aliasing char16_t* constructor.
2032    * The text will be used for the UnicodeString object, but
2033    * it will not be released when the UnicodeString is destroyed.
2034    * This has copy-on-write semantics:
2035    * When the string is modified, then the buffer is first copied into
2036    * newly allocated memory.
2037    * The aliased buffer is never modified.
2038    *
2039    * In an assignment to another UnicodeString, when using the copy constructor
2040    * or the assignment operator, the text will be copied.
2041    * When using fastCopyFrom(), the text will be aliased again,
2042    * so that both strings then alias the same readonly-text.
2043    *
2044    * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
2045    *                     This must be true if <code>textLength==-1</code>.
2046    * @param text The characters to alias for the UnicodeString.
2047    * @param textLength The number of Unicode characters in <code>text</code> to alias.
2048    *                   If -1, then this constructor will determine the length
2049    *                   by calling <code>u_strlen()</code>.
2050    * @return a reference to this
2051    * @stable ICU 2.0
2052    */
2053   UnicodeString &setTo(UBool isTerminated,
2054                        ConstChar16Ptr text,
2055                        int32_t textLength);
2056 
2057   /**
2058    * Aliasing setTo() function, analogous to the writable-aliasing char16_t* constructor.
2059    * The text will be used for the UnicodeString object, but
2060    * it will not be released when the UnicodeString is destroyed.
2061    * This has write-through semantics:
2062    * For as long as the capacity of the buffer is sufficient, write operations
2063    * will directly affect the buffer. When more capacity is necessary, then
2064    * a new buffer will be allocated and the contents copied as with regularly
2065    * constructed strings.
2066    * In an assignment to another UnicodeString, the buffer will be copied.
2067    * The extract(Char16Ptr dst) function detects whether the dst pointer is the same
2068    * as the string buffer itself and will in this case not copy the contents.
2069    *
2070    * @param buffer The characters to alias for the UnicodeString.
2071    * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
2072    * @param buffCapacity The size of <code>buffer</code> in char16_ts.
2073    * @return a reference to this
2074    * @stable ICU 2.0
2075    */
2076   UnicodeString &setTo(char16_t *buffer,
2077                        int32_t buffLength,
2078                        int32_t buffCapacity);
2079 
2080   /**
2081    * Make this UnicodeString object invalid.
2082    * The string will test TRUE with isBogus().
2083    *
2084    * A bogus string has no value. It is different from an empty string.
2085    * It can be used to indicate that no string value is available.
2086    * getBuffer() and getTerminatedBuffer() return NULL, and
2087    * length() returns 0.
2088    *
2089    * This utility function is used throughout the UnicodeString
2090    * implementation to indicate that a UnicodeString operation failed,
2091    * and may be used in other functions,
2092    * especially but not exclusively when such functions do not
2093    * take a UErrorCode for simplicity.
2094    *
2095    * The following methods, and no others, will clear a string object's bogus flag:
2096    * - remove()
2097    * - remove(0, INT32_MAX)
2098    * - truncate(0)
2099    * - operator=() (assignment operator)
2100    * - setTo(...)
2101    *
2102    * The simplest ways to turn a bogus string into an empty one
2103    * is to use the remove() function.
2104    * Examples for other functions that are equivalent to "set to empty string":
2105    * \code
2106    * if(s.isBogus()) {
2107    *   s.remove();           // set to an empty string (remove all), or
2108    *   s.remove(0, INT32_MAX); // set to an empty string (remove all), or
2109    *   s.truncate(0);        // set to an empty string (complete truncation), or
2110    *   s=UnicodeString();    // assign an empty string, or
2111    *   s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
2112    *   static const char16_t nul=0;
2113    *   s.setTo(&nul, 0);     // set to an empty C Unicode string
2114    * }
2115    * \endcode
2116    *
2117    * @see isBogus()
2118    * @stable ICU 2.0
2119    */
2120   void setToBogus();
2121 
2122   /**
2123    * Set the character at the specified offset to the specified character.
2124    * @param offset A valid offset into the text of the character to set
2125    * @param ch The new character
2126    * @return A reference to this
2127    * @stable ICU 2.0
2128    */
2129   UnicodeString& setCharAt(int32_t offset,
2130                char16_t ch);
2131 
2132 
2133   /* Append operations */
2134 
2135   /**
2136    * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString
2137    * object.
2138    * @param ch the code unit to be appended
2139    * @return a reference to this
2140    * @stable ICU 2.0
2141    */
2142  inline  UnicodeString& operator+= (char16_t ch);
2143 
2144   /**
2145    * Append operator. Append the code point <TT>ch</TT> to the UnicodeString
2146    * object.
2147    * @param ch the code point to be appended
2148    * @return a reference to this
2149    * @stable ICU 2.0
2150    */
2151  inline  UnicodeString& operator+= (UChar32 ch);
2152 
2153   /**
2154    * Append operator. Append the characters in <TT>srcText</TT> to the
2155    * UnicodeString object. <TT>srcText</TT> is not modified.
2156    * @param srcText the source for the new characters
2157    * @return a reference to this
2158    * @stable ICU 2.0
2159    */
2160   inline UnicodeString& operator+= (const UnicodeString& srcText);
2161 
2162   /**
2163    * Append the characters
2164    * in <TT>srcText</TT> in the range
2165    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the
2166    * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT>
2167    * is not modified.
2168    * @param srcText the source for the new characters
2169    * @param srcStart the offset into <TT>srcText</TT> where new characters
2170    * will be obtained
2171    * @param srcLength the number of characters in <TT>srcText</TT> in
2172    * the append string
2173    * @return a reference to this
2174    * @stable ICU 2.0
2175    */
2176   inline UnicodeString& append(const UnicodeString& srcText,
2177             int32_t srcStart,
2178             int32_t srcLength);
2179 
2180   /**
2181    * Append the characters in <TT>srcText</TT> to the UnicodeString object.
2182    * <TT>srcText</TT> is not modified.
2183    * @param srcText the source for the new characters
2184    * @return a reference to this
2185    * @stable ICU 2.0
2186    */
2187   inline UnicodeString& append(const UnicodeString& srcText);
2188 
2189   /**
2190    * Append the characters in <TT>srcChars</TT> in the range
2191    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString
2192    * object at offset
2193    * <TT>start</TT>. <TT>srcChars</TT> is not modified.
2194    * @param srcChars the source for the new characters
2195    * @param srcStart the offset into <TT>srcChars</TT> where new characters
2196    * will be obtained
2197    * @param srcLength the number of characters in <TT>srcChars</TT> in
2198    *                  the append string; can be -1 if <TT>srcChars</TT> is NUL-terminated
2199    * @return a reference to this
2200    * @stable ICU 2.0
2201    */
2202   inline UnicodeString& append(const char16_t *srcChars,
2203             int32_t srcStart,
2204             int32_t srcLength);
2205 
2206   /**
2207    * Append the characters in <TT>srcChars</TT> to the UnicodeString object
2208    * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2209    * @param srcChars the source for the new characters
2210    * @param srcLength the number of Unicode characters in <TT>srcChars</TT>;
2211    *                  can be -1 if <TT>srcChars</TT> is NUL-terminated
2212    * @return a reference to this
2213    * @stable ICU 2.0
2214    */
2215   inline UnicodeString& append(ConstChar16Ptr srcChars,
2216             int32_t srcLength);
2217 
2218   /**
2219    * Append the code unit <TT>srcChar</TT> to the UnicodeString object.
2220    * @param srcChar the code unit to append
2221    * @return a reference to this
2222    * @stable ICU 2.0
2223    */
2224   inline UnicodeString& append(char16_t srcChar);
2225 
2226   /**
2227    * Append the code point <TT>srcChar</TT> to the UnicodeString object.
2228    * @param srcChar the code point to append
2229    * @return a reference to this
2230    * @stable ICU 2.0
2231    */
2232   UnicodeString& append(UChar32 srcChar);
2233 
2234 
2235   /* Insert operations */
2236 
2237   /**
2238    * Insert the characters in <TT>srcText</TT> in the range
2239    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
2240    * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
2241    * @param start the offset where the insertion begins
2242    * @param srcText the source for the new characters
2243    * @param srcStart the offset into <TT>srcText</TT> where new characters
2244    * will be obtained
2245    * @param srcLength the number of characters in <TT>srcText</TT> in
2246    * the insert string
2247    * @return a reference to this
2248    * @stable ICU 2.0
2249    */
2250   inline UnicodeString& insert(int32_t start,
2251             const UnicodeString& srcText,
2252             int32_t srcStart,
2253             int32_t srcLength);
2254 
2255   /**
2256    * Insert the characters in <TT>srcText</TT> into the UnicodeString object
2257    * at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
2258    * @param start the offset where the insertion begins
2259    * @param srcText the source for the new characters
2260    * @return a reference to this
2261    * @stable ICU 2.0
2262    */
2263   inline UnicodeString& insert(int32_t start,
2264             const UnicodeString& srcText);
2265 
2266   /**
2267    * Insert the characters in <TT>srcChars</TT> in the range
2268    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
2269    *  object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2270    * @param start the offset at which the insertion begins
2271    * @param srcChars the source for the new characters
2272    * @param srcStart the offset into <TT>srcChars</TT> where new characters
2273    * will be obtained
2274    * @param srcLength the number of characters in <TT>srcChars</TT>
2275    * in the insert string
2276    * @return a reference to this
2277    * @stable ICU 2.0
2278    */
2279   inline UnicodeString& insert(int32_t start,
2280             const char16_t *srcChars,
2281             int32_t srcStart,
2282             int32_t srcLength);
2283 
2284   /**
2285    * Insert the characters in <TT>srcChars</TT> into the UnicodeString object
2286    * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2287    * @param start the offset where the insertion begins
2288    * @param srcChars the source for the new characters
2289    * @param srcLength the number of Unicode characters in srcChars.
2290    * @return a reference to this
2291    * @stable ICU 2.0
2292    */
2293   inline UnicodeString& insert(int32_t start,
2294             ConstChar16Ptr srcChars,
2295             int32_t srcLength);
2296 
2297   /**
2298    * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at
2299    * offset <TT>start</TT>.
2300    * @param start the offset at which the insertion occurs
2301    * @param srcChar the code unit to insert
2302    * @return a reference to this
2303    * @stable ICU 2.0
2304    */
2305   inline UnicodeString& insert(int32_t start,
2306             char16_t srcChar);
2307 
2308   /**
2309    * Insert the code point <TT>srcChar</TT> into the UnicodeString object at
2310    * offset <TT>start</TT>.
2311    * @param start the offset at which the insertion occurs
2312    * @param srcChar the code point to insert
2313    * @return a reference to this
2314    * @stable ICU 2.0
2315    */
2316   inline UnicodeString& insert(int32_t start,
2317             UChar32 srcChar);
2318 
2319 
2320   /* Replace operations */
2321 
2322   /**
2323    * Replace the characters in the range
2324    * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2325    * <TT>srcText</TT> in the range
2326    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
2327    * <TT>srcText</TT> is not modified.
2328    * @param start the offset at which the replace operation begins
2329    * @param length the number of characters to replace. The character at
2330    * <TT>start + length</TT> is not modified.
2331    * @param srcText the source for the new characters
2332    * @param srcStart the offset into <TT>srcText</TT> where new characters
2333    * will be obtained
2334    * @param srcLength the number of characters in <TT>srcText</TT> in
2335    * the replace string
2336    * @return a reference to this
2337    * @stable ICU 2.0
2338    */
2339   UnicodeString& replace(int32_t start,
2340              int32_t length,
2341              const UnicodeString& srcText,
2342              int32_t srcStart,
2343              int32_t srcLength);
2344 
2345   /**
2346    * Replace the characters in the range
2347    * [<TT>start</TT>, <TT>start + length</TT>)
2348    * with the characters in <TT>srcText</TT>.  <TT>srcText</TT> is
2349    *  not modified.
2350    * @param start the offset at which the replace operation begins
2351    * @param length the number of characters to replace. The character at
2352    * <TT>start + length</TT> is not modified.
2353    * @param srcText the source for the new characters
2354    * @return a reference to this
2355    * @stable ICU 2.0
2356    */
2357   UnicodeString& replace(int32_t start,
2358              int32_t length,
2359              const UnicodeString& srcText);
2360 
2361   /**
2362    * Replace the characters in the range
2363    * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2364    * <TT>srcChars</TT> in the range
2365    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT>
2366    * is not modified.
2367    * @param start the offset at which the replace operation begins
2368    * @param length the number of characters to replace.  The character at
2369    * <TT>start + length</TT> is not modified.
2370    * @param srcChars the source for the new characters
2371    * @param srcStart the offset into <TT>srcChars</TT> where new characters
2372    * will be obtained
2373    * @param srcLength the number of characters in <TT>srcChars</TT>
2374    * in the replace string
2375    * @return a reference to this
2376    * @stable ICU 2.0
2377    */
2378   UnicodeString& replace(int32_t start,
2379              int32_t length,
2380              const char16_t *srcChars,
2381              int32_t srcStart,
2382              int32_t srcLength);
2383 
2384   /**
2385    * Replace the characters in the range
2386    * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2387    * <TT>srcChars</TT>.  <TT>srcChars</TT> is not modified.
2388    * @param start the offset at which the replace operation begins
2389    * @param length number of characters to replace.  The character at
2390    * <TT>start + length</TT> is not modified.
2391    * @param srcChars the source for the new characters
2392    * @param srcLength the number of Unicode characters in srcChars
2393    * @return a reference to this
2394    * @stable ICU 2.0
2395    */
2396   inline UnicodeString& replace(int32_t start,
2397              int32_t length,
2398              ConstChar16Ptr srcChars,
2399              int32_t srcLength);
2400 
2401   /**
2402    * Replace the characters in the range
2403    * [<TT>start</TT>, <TT>start + length</TT>) with the code unit
2404    * <TT>srcChar</TT>.
2405    * @param start the offset at which the replace operation begins
2406    * @param length the number of characters to replace.  The character at
2407    * <TT>start + length</TT> is not modified.
2408    * @param srcChar the new code unit
2409    * @return a reference to this
2410    * @stable ICU 2.0
2411    */
2412   inline UnicodeString& replace(int32_t start,
2413              int32_t length,
2414              char16_t srcChar);
2415 
2416   /**
2417    * Replace the characters in the range
2418    * [<TT>start</TT>, <TT>start + length</TT>) with the code point
2419    * <TT>srcChar</TT>.
2420    * @param start the offset at which the replace operation begins
2421    * @param length the number of characters to replace.  The character at
2422    * <TT>start + length</TT> is not modified.
2423    * @param srcChar the new code point
2424    * @return a reference to this
2425    * @stable ICU 2.0
2426    */
2427   UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
2428 
2429   /**
2430    * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
2431    * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified.
2432    * @param start the offset at which the replace operation begins
2433    * @param limit the offset immediately following the replace range
2434    * @param srcText the source for the new characters
2435    * @return a reference to this
2436    * @stable ICU 2.0
2437    */
2438   inline UnicodeString& replaceBetween(int32_t start,
2439                 int32_t limit,
2440                 const UnicodeString& srcText);
2441 
2442   /**
2443    * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
2444    * with the characters in <TT>srcText</TT> in the range
2445    * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified.
2446    * @param start the offset at which the replace operation begins
2447    * @param limit the offset immediately following the replace range
2448    * @param srcText the source for the new characters
2449    * @param srcStart the offset into <TT>srcChars</TT> where new characters
2450    * will be obtained
2451    * @param srcLimit the offset immediately following the range to copy
2452    * in <TT>srcText</TT>
2453    * @return a reference to this
2454    * @stable ICU 2.0
2455    */
2456   inline UnicodeString& replaceBetween(int32_t start,
2457                 int32_t limit,
2458                 const UnicodeString& srcText,
2459                 int32_t srcStart,
2460                 int32_t srcLimit);
2461 
2462   /**
2463    * Replace a substring of this object with the given text.
2464    * @param start the beginning index, inclusive; <code>0 <= start
2465    * <= limit</code>.
2466    * @param limit the ending index, exclusive; <code>start <= limit
2467    * <= length()</code>.
2468    * @param text the text to replace characters <code>start</code>
2469    * to <code>limit - 1</code>
2470    * @stable ICU 2.0
2471    */
2472   virtual void handleReplaceBetween(int32_t start,
2473                                     int32_t limit,
2474                                     const UnicodeString& text);
2475 
2476   /**
2477    * Replaceable API
2478    * @return TRUE if it has MetaData
2479    * @stable ICU 2.4
2480    */
2481   virtual UBool hasMetaData() const;
2482 
2483   /**
2484    * Copy a substring of this object, retaining attribute (out-of-band)
2485    * information.  This method is used to duplicate or reorder substrings.
2486    * The destination index must not overlap the source range.
2487    *
2488    * @param start the beginning index, inclusive; <code>0 <= start <=
2489    * limit</code>.
2490    * @param limit the ending index, exclusive; <code>start <= limit <=
2491    * length()</code>.
2492    * @param dest the destination index.  The characters from
2493    * <code>start..limit-1</code> will be copied to <code>dest</code>.
2494    * Implementations of this method may assume that <code>dest <= start ||
2495    * dest >= limit</code>.
2496    * @stable ICU 2.0
2497    */
2498   virtual void copy(int32_t start, int32_t limit, int32_t dest);
2499 
2500   /* Search and replace operations */
2501 
2502   /**
2503    * Replace all occurrences of characters in oldText with the characters
2504    * in newText
2505    * @param oldText the text containing the search text
2506    * @param newText the text containing the replacement text
2507    * @return a reference to this
2508    * @stable ICU 2.0
2509    */
2510   inline UnicodeString& findAndReplace(const UnicodeString& oldText,
2511                 const UnicodeString& newText);
2512 
2513   /**
2514    * Replace all occurrences of characters in oldText with characters
2515    * in newText
2516    * in the range [<TT>start</TT>, <TT>start + length</TT>).
2517    * @param start the start of the range in which replace will performed
2518    * @param length the length of the range in which replace will be performed
2519    * @param oldText the text containing the search text
2520    * @param newText the text containing the replacement text
2521    * @return a reference to this
2522    * @stable ICU 2.0
2523    */
2524   inline UnicodeString& findAndReplace(int32_t start,
2525                 int32_t length,
2526                 const UnicodeString& oldText,
2527                 const UnicodeString& newText);
2528 
2529   /**
2530    * Replace all occurrences of characters in oldText in the range
2531    * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters
2532    * in newText in the range
2533    * [<TT>newStart</TT>, <TT>newStart + newLength</TT>)
2534    * in the range [<TT>start</TT>, <TT>start + length</TT>).
2535    * @param start the start of the range in which replace will performed
2536    * @param length the length of the range in which replace will be performed
2537    * @param oldText the text containing the search text
2538    * @param oldStart the start of the search range in <TT>oldText</TT>
2539    * @param oldLength the length of the search range in <TT>oldText</TT>
2540    * @param newText the text containing the replacement text
2541    * @param newStart the start of the replacement range in <TT>newText</TT>
2542    * @param newLength the length of the replacement range in <TT>newText</TT>
2543    * @return a reference to this
2544    * @stable ICU 2.0
2545    */
2546   UnicodeString& findAndReplace(int32_t start,
2547                 int32_t length,
2548                 const UnicodeString& oldText,
2549                 int32_t oldStart,
2550                 int32_t oldLength,
2551                 const UnicodeString& newText,
2552                 int32_t newStart,
2553                 int32_t newLength);
2554 
2555 
2556   /* Remove operations */
2557 
2558   /**
2559    * Remove all characters from the UnicodeString object.
2560    * @return a reference to this
2561    * @stable ICU 2.0
2562    */
2563   inline UnicodeString& remove(void);
2564 
2565   /**
2566    * Remove the characters in the range
2567    * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object.
2568    * @param start the offset of the first character to remove
2569    * @param length the number of characters to remove
2570    * @return a reference to this
2571    * @stable ICU 2.0
2572    */
2573   inline UnicodeString& remove(int32_t start,
2574                                int32_t length = (int32_t)INT32_MAX);
2575 
2576   /**
2577    * Remove the characters in the range
2578    * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object.
2579    * @param start the offset of the first character to remove
2580    * @param limit the offset immediately following the range to remove
2581    * @return a reference to this
2582    * @stable ICU 2.0
2583    */
2584   inline UnicodeString& removeBetween(int32_t start,
2585                                       int32_t limit = (int32_t)INT32_MAX);
2586 
2587   /**
2588    * Retain only the characters in the range
2589    * [<code>start</code>, <code>limit</code>) from the UnicodeString object.
2590    * Removes characters before <code>start</code> and at and after <code>limit</code>.
2591    * @param start the offset of the first character to retain
2592    * @param limit the offset immediately following the range to retain
2593    * @return a reference to this
2594    * @stable ICU 4.4
2595    */
2596   inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
2597 
2598   /* Length operations */
2599 
2600   /**
2601    * Pad the start of this UnicodeString with the character <TT>padChar</TT>.
2602    * If the length of this UnicodeString is less than targetLength,
2603    * length() - targetLength copies of padChar will be added to the
2604    * beginning of this UnicodeString.
2605    * @param targetLength the desired length of the string
2606    * @param padChar the character to use for padding. Defaults to
2607    * space (U+0020)
2608    * @return TRUE if the text was padded, FALSE otherwise.
2609    * @stable ICU 2.0
2610    */
2611   UBool padLeading(int32_t targetLength,
2612                     char16_t padChar = 0x0020);
2613 
2614   /**
2615    * Pad the end of this UnicodeString with the character <TT>padChar</TT>.
2616    * If the length of this UnicodeString is less than targetLength,
2617    * length() - targetLength copies of padChar will be added to the
2618    * end of this UnicodeString.
2619    * @param targetLength the desired length of the string
2620    * @param padChar the character to use for padding. Defaults to
2621    * space (U+0020)
2622    * @return TRUE if the text was padded, FALSE otherwise.
2623    * @stable ICU 2.0
2624    */
2625   UBool padTrailing(int32_t targetLength,
2626                      char16_t padChar = 0x0020);
2627 
2628   /**
2629    * Truncate this UnicodeString to the <TT>targetLength</TT>.
2630    * @param targetLength the desired length of this UnicodeString.
2631    * @return TRUE if the text was truncated, FALSE otherwise
2632    * @stable ICU 2.0
2633    */
2634   inline UBool truncate(int32_t targetLength);
2635 
2636   /**
2637    * Trims leading and trailing whitespace from this UnicodeString.
2638    * @return a reference to this
2639    * @stable ICU 2.0
2640    */
2641   UnicodeString& trim(void);
2642 
2643 
2644   /* Miscellaneous operations */
2645 
2646   /**
2647    * Reverse this UnicodeString in place.
2648    * @return a reference to this
2649    * @stable ICU 2.0
2650    */
2651   inline UnicodeString& reverse(void);
2652 
2653   /**
2654    * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in
2655    * this UnicodeString.
2656    * @param start the start of the range to reverse
2657    * @param length the number of characters to to reverse
2658    * @return a reference to this
2659    * @stable ICU 2.0
2660    */
2661   inline UnicodeString& reverse(int32_t start,
2662              int32_t length);
2663 
2664   /**
2665    * Convert the characters in this to UPPER CASE following the conventions of
2666    * the default locale.
2667    * @return A reference to this.
2668    * @stable ICU 2.0
2669    */
2670   UnicodeString& toUpper(void);
2671 
2672   /**
2673    * Convert the characters in this to UPPER CASE following the conventions of
2674    * a specific locale.
2675    * @param locale The locale containing the conventions to use.
2676    * @return A reference to this.
2677    * @stable ICU 2.0
2678    */
2679   UnicodeString& toUpper(const Locale& locale);
2680 
2681   /**
2682    * Convert the characters in this to lower case following the conventions of
2683    * the default locale.
2684    * @return A reference to this.
2685    * @stable ICU 2.0
2686    */
2687   UnicodeString& toLower(void);
2688 
2689   /**
2690    * Convert the characters in this to lower case following the conventions of
2691    * a specific locale.
2692    * @param locale The locale containing the conventions to use.
2693    * @return A reference to this.
2694    * @stable ICU 2.0
2695    */
2696   UnicodeString& toLower(const Locale& locale);
2697 
2698 #if !UCONFIG_NO_BREAK_ITERATION
2699 
2700   /**
2701    * Titlecase this string, convenience function using the default locale.
2702    *
2703    * Casing is locale-dependent and context-sensitive.
2704    * Titlecasing uses a break iterator to find the first characters of words
2705    * that are to be titlecased. It titlecases those characters and lowercases
2706    * all others.
2707    *
2708    * The titlecase break iterator can be provided to customize for arbitrary
2709    * styles, using rules and dictionaries beyond the standard iterators.
2710    * It may be more efficient to always provide an iterator to avoid
2711    * opening and closing one for each string.
2712    * The standard titlecase iterator for the root locale implements the
2713    * algorithm of Unicode TR 21.
2714    *
2715    * This function uses only the setText(), first() and next() methods of the
2716    * provided break iterator.
2717    *
2718    * @param titleIter A break iterator to find the first characters of words
2719    *                  that are to be titlecased.
2720    *                  If none is provided (0), then a standard titlecase
2721    *                  break iterator is opened.
2722    *                  Otherwise the provided iterator is set to the string's text.
2723    * @return A reference to this.
2724    * @stable ICU 2.1
2725    */
2726   UnicodeString &toTitle(BreakIterator *titleIter);
2727 
2728   /**
2729    * Titlecase this string.
2730    *
2731    * Casing is locale-dependent and context-sensitive.
2732    * Titlecasing uses a break iterator to find the first characters of words
2733    * that are to be titlecased. It titlecases those characters and lowercases
2734    * all others.
2735    *
2736    * The titlecase break iterator can be provided to customize for arbitrary
2737    * styles, using rules and dictionaries beyond the standard iterators.
2738    * It may be more efficient to always provide an iterator to avoid
2739    * opening and closing one for each string.
2740    * The standard titlecase iterator for the root locale implements the
2741    * algorithm of Unicode TR 21.
2742    *
2743    * This function uses only the setText(), first() and next() methods of the
2744    * provided break iterator.
2745    *
2746    * @param titleIter A break iterator to find the first characters of words
2747    *                  that are to be titlecased.
2748    *                  If none is provided (0), then a standard titlecase
2749    *                  break iterator is opened.
2750    *                  Otherwise the provided iterator is set to the string's text.
2751    * @param locale    The locale to consider.
2752    * @return A reference to this.
2753    * @stable ICU 2.1
2754    */
2755   UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
2756 
2757   /**
2758    * Titlecase this string, with options.
2759    *
2760    * Casing is locale-dependent and context-sensitive.
2761    * Titlecasing uses a break iterator to find the first characters of words
2762    * that are to be titlecased. It titlecases those characters and lowercases
2763    * all others. (This can be modified with options.)
2764    *
2765    * The titlecase break iterator can be provided to customize for arbitrary
2766    * styles, using rules and dictionaries beyond the standard iterators.
2767    * It may be more efficient to always provide an iterator to avoid
2768    * opening and closing one for each string.
2769    * The standard titlecase iterator for the root locale implements the
2770    * algorithm of Unicode TR 21.
2771    *
2772    * This function uses only the setText(), first() and next() methods of the
2773    * provided break iterator.
2774    *
2775    * @param titleIter A break iterator to find the first characters of words
2776    *                  that are to be titlecased.
2777    *                  If none is provided (0), then a standard titlecase
2778    *                  break iterator is opened.
2779    *                  Otherwise the provided iterator is set to the string's text.
2780    * @param locale    The locale to consider.
2781    * @param options   Options bit set, usually 0. See U_TITLECASE_NO_LOWERCASE,
2782    *                  U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
2783    *                  U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
2784    * @param options Options bit set, see ucasemap_open().
2785    * @return A reference to this.
2786    * @stable ICU 3.8
2787    */
2788   UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
2789 
2790 #endif
2791 
2792   /**
2793    * Case-folds the characters in this string.
2794    *
2795    * Case-folding is locale-independent and not context-sensitive,
2796    * but there is an option for whether to include or exclude mappings for dotted I
2797    * and dotless i that are marked with 'T' in CaseFolding.txt.
2798    *
2799    * The result may be longer or shorter than the original.
2800    *
2801    * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
2802    * @return A reference to this.
2803    * @stable ICU 2.0
2804    */
2805   UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
2806 
2807   //========================================
2808   // Access to the internal buffer
2809   //========================================
2810 
2811   /**
2812    * Get a read/write pointer to the internal buffer.
2813    * The buffer is guaranteed to be large enough for at least minCapacity char16_ts,
2814    * writable, and is still owned by the UnicodeString object.
2815    * Calls to getBuffer(minCapacity) must not be nested, and
2816    * must be matched with calls to releaseBuffer(newLength).
2817    * If the string buffer was read-only or shared,
2818    * then it will be reallocated and copied.
2819    *
2820    * An attempted nested call will return 0, and will not further modify the
2821    * state of the UnicodeString object.
2822    * It also returns 0 if the string is bogus.
2823    *
2824    * The actual capacity of the string buffer may be larger than minCapacity.
2825    * getCapacity() returns the actual capacity.
2826    * For many operations, the full capacity should be used to avoid reallocations.
2827    *
2828    * While the buffer is "open" between getBuffer(minCapacity)
2829    * and releaseBuffer(newLength), the following applies:
2830    * - The string length is set to 0.
2831    * - Any read API call on the UnicodeString object will behave like on a 0-length string.
2832    * - Any write API call on the UnicodeString object is disallowed and will have no effect.
2833    * - You can read from and write to the returned buffer.
2834    * - The previous string contents will still be in the buffer;
2835    *   if you want to use it, then you need to call length() before getBuffer(minCapacity).
2836    *   If the length() was greater than minCapacity, then any contents after minCapacity
2837    *   may be lost.
2838    *   The buffer contents is not NUL-terminated by getBuffer().
2839    *   If length()<getCapacity() then you can terminate it by writing a NUL
2840    *   at index length().
2841    * - You must call releaseBuffer(newLength) before and in order to
2842    *   return to normal UnicodeString operation.
2843    *
2844    * @param minCapacity the minimum number of char16_ts that are to be available
2845    *        in the buffer, starting at the returned pointer;
2846    *        default to the current string capacity if minCapacity==-1
2847    * @return a writable pointer to the internal string buffer,
2848    *         or nullptr if an error occurs (nested calls, out of memory)
2849    *
2850    * @see releaseBuffer
2851    * @see getTerminatedBuffer()
2852    * @stable ICU 2.0
2853    */
2854   char16_t *getBuffer(int32_t minCapacity);
2855 
2856   /**
2857    * Release a read/write buffer on a UnicodeString object with an
2858    * "open" getBuffer(minCapacity).
2859    * This function must be called in a matched pair with getBuffer(minCapacity).
2860    * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
2861    *
2862    * It will set the string length to newLength, at most to the current capacity.
2863    * If newLength==-1 then it will set the length according to the
2864    * first NUL in the buffer, or to the capacity if there is no NUL.
2865    *
2866    * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
2867    *
2868    * @param newLength the new length of the UnicodeString object;
2869    *        defaults to the current capacity if newLength is greater than that;
2870    *        if newLength==-1, it defaults to u_strlen(buffer) but not more than
2871    *        the current capacity of the string
2872    *
2873    * @see getBuffer(int32_t minCapacity)
2874    * @stable ICU 2.0
2875    */
2876   void releaseBuffer(int32_t newLength=-1);
2877 
2878   /**
2879    * Get a read-only pointer to the internal buffer.
2880    * This can be called at any time on a valid UnicodeString.
2881    *
2882    * It returns 0 if the string is bogus, or
2883    * during an "open" getBuffer(minCapacity).
2884    *
2885    * It can be called as many times as desired.
2886    * The pointer that it returns will remain valid until the UnicodeString object is modified,
2887    * at which time the pointer is semantically invalidated and must not be used any more.
2888    *
2889    * The capacity of the buffer can be determined with getCapacity().
2890    * The part after length() may or may not be initialized and valid,
2891    * depending on the history of the UnicodeString object.
2892    *
2893    * The buffer contents is (probably) not NUL-terminated.
2894    * You can check if it is with
2895    * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>.
2896    * (See getTerminatedBuffer().)
2897    *
2898    * The buffer may reside in read-only memory. Its contents must not
2899    * be modified.
2900    *
2901    * @return a read-only pointer to the internal string buffer,
2902    *         or nullptr if the string is empty or bogus
2903    *
2904    * @see getBuffer(int32_t minCapacity)
2905    * @see getTerminatedBuffer()
2906    * @stable ICU 2.0
2907    */
2908   inline const char16_t *getBuffer() const;
2909 
2910   /**
2911    * Get a read-only pointer to the internal buffer,
2912    * making sure that it is NUL-terminated.
2913    * This can be called at any time on a valid UnicodeString.
2914    *
2915    * It returns 0 if the string is bogus, or
2916    * during an "open" getBuffer(minCapacity), or if the buffer cannot
2917    * be NUL-terminated (because memory allocation failed).
2918    *
2919    * It can be called as many times as desired.
2920    * The pointer that it returns will remain valid until the UnicodeString object is modified,
2921    * at which time the pointer is semantically invalidated and must not be used any more.
2922    *
2923    * The capacity of the buffer can be determined with getCapacity().
2924    * The part after length()+1 may or may not be initialized and valid,
2925    * depending on the history of the UnicodeString object.
2926    *
2927    * The buffer contents is guaranteed to be NUL-terminated.
2928    * getTerminatedBuffer() may reallocate the buffer if a terminating NUL
2929    * is written.
2930    * For this reason, this function is not const, unlike getBuffer().
2931    * Note that a UnicodeString may also contain NUL characters as part of its contents.
2932    *
2933    * The buffer may reside in read-only memory. Its contents must not
2934    * be modified.
2935    *
2936    * @return a read-only pointer to the internal string buffer,
2937    *         or 0 if the string is empty or bogus
2938    *
2939    * @see getBuffer(int32_t minCapacity)
2940    * @see getBuffer()
2941    * @stable ICU 2.2
2942    */
2943   const char16_t *getTerminatedBuffer();
2944 
2945   //========================================
2946   // Constructors
2947   //========================================
2948 
2949   /** Construct an empty UnicodeString.
2950    * @stable ICU 2.0
2951    */
2952   inline UnicodeString();
2953 
2954   /**
2955    * Construct a UnicodeString with capacity to hold <TT>capacity</TT> char16_ts
2956    * @param capacity the number of char16_ts this UnicodeString should hold
2957    * before a resize is necessary; if count is greater than 0 and count
2958    * code points c take up more space than capacity, then capacity is adjusted
2959    * accordingly.
2960    * @param c is used to initially fill the string
2961    * @param count specifies how many code points c are to be written in the
2962    *              string
2963    * @stable ICU 2.0
2964    */
2965   UnicodeString(int32_t capacity, UChar32 c, int32_t count);
2966 
2967   /**
2968    * Single char16_t (code unit) constructor.
2969    *
2970    * It is recommended to mark this constructor "explicit" by
2971    * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>
2972    * on the compiler command line or similar.
2973    * @param ch the character to place in the UnicodeString
2974    * @stable ICU 2.0
2975    */
2976   UNISTR_FROM_CHAR_EXPLICIT UnicodeString(char16_t ch);
2977 
2978   /**
2979    * Single UChar32 (code point) constructor.
2980    *
2981    * It is recommended to mark this constructor "explicit" by
2982    * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>
2983    * on the compiler command line or similar.
2984    * @param ch the character to place in the UnicodeString
2985    * @stable ICU 2.0
2986    */
2987   UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch);
2988 
2989   /**
2990    * char16_t* constructor.
2991    *
2992    * It is recommended to mark this constructor "explicit" by
2993    * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
2994    * on the compiler command line or similar.
2995    * @param text The characters to place in the UnicodeString.  <TT>text</TT>
2996    * must be NULL (U+0000) terminated.
2997    * @stable ICU 2.0
2998    */
2999   UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text);
3000 
3001 #if !U_CHAR16_IS_TYPEDEF
3002   /**
3003    * uint16_t * constructor.
3004    * Delegates to UnicodeString(const char16_t *).
3005    *
3006    * It is recommended to mark this constructor "explicit" by
3007    * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
3008    * on the compiler command line or similar.
3009    * @param text NUL-terminated UTF-16 string
3010    * @stable ICU 59
3011    */
UnicodeString(const uint16_t * text)3012   UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) :
3013       UnicodeString(ConstChar16Ptr(text)) {}
3014 #endif
3015 
3016 #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
3017   /**
3018    * wchar_t * constructor.
3019    * (Only defined if U_SIZEOF_WCHAR_T==2.)
3020    * Delegates to UnicodeString(const char16_t *).
3021    *
3022    * It is recommended to mark this constructor "explicit" by
3023    * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
3024    * on the compiler command line or similar.
3025    * @param text NUL-terminated UTF-16 string
3026    * @stable ICU 59
3027    */
UnicodeString(const wchar_t * text)3028   UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) :
3029       UnicodeString(ConstChar16Ptr(text)) {}
3030 #endif
3031 
3032   /**
3033    * nullptr_t constructor.
3034    * Effectively the same as the default constructor, makes an empty string object.
3035    *
3036    * It is recommended to mark this constructor "explicit" by
3037    * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
3038    * on the compiler command line or similar.
3039    * @param text nullptr
3040    * @stable ICU 59
3041    */
3042   UNISTR_FROM_STRING_EXPLICIT inline UnicodeString(const std::nullptr_t text);
3043 
3044   /**
3045    * char16_t* constructor.
3046    * @param text The characters to place in the UnicodeString.
3047    * @param textLength The number of Unicode characters in <TT>text</TT>
3048    * to copy.
3049    * @stable ICU 2.0
3050    */
3051   UnicodeString(const char16_t *text,
3052         int32_t textLength);
3053 
3054 #if !U_CHAR16_IS_TYPEDEF
3055   /**
3056    * uint16_t * constructor.
3057    * Delegates to UnicodeString(const char16_t *, int32_t).
3058    * @param text UTF-16 string
3059    * @param length string length
3060    * @stable ICU 59
3061    */
UnicodeString(const uint16_t * text,int32_t length)3062   UnicodeString(const uint16_t *text, int32_t length) :
3063       UnicodeString(ConstChar16Ptr(text), length) {}
3064 #endif
3065 
3066 #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
3067   /**
3068    * wchar_t * constructor.
3069    * (Only defined if U_SIZEOF_WCHAR_T==2.)
3070    * Delegates to UnicodeString(const char16_t *, int32_t).
3071    * @param text NUL-terminated UTF-16 string
3072    * @param length string length
3073    * @stable ICU 59
3074    */
UnicodeString(const wchar_t * text,int32_t length)3075   UnicodeString(const wchar_t *text, int32_t length) :
3076       UnicodeString(ConstChar16Ptr(text), length) {}
3077 #endif
3078 
3079   /**
3080    * nullptr_t constructor.
3081    * Effectively the same as the default constructor, makes an empty string object.
3082    * @param text nullptr
3083    * @param length ignored
3084    * @stable ICU 59
3085    */
3086   inline UnicodeString(const std::nullptr_t text, int32_t length);
3087 
3088   /**
3089    * Readonly-aliasing char16_t* constructor.
3090    * The text will be used for the UnicodeString object, but
3091    * it will not be released when the UnicodeString is destroyed.
3092    * This has copy-on-write semantics:
3093    * When the string is modified, then the buffer is first copied into
3094    * newly allocated memory.
3095    * The aliased buffer is never modified.
3096    *
3097    * In an assignment to another UnicodeString, when using the copy constructor
3098    * or the assignment operator, the text will be copied.
3099    * When using fastCopyFrom(), the text will be aliased again,
3100    * so that both strings then alias the same readonly-text.
3101    *
3102    * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
3103    *                     This must be true if <code>textLength==-1</code>.
3104    * @param text The characters to alias for the UnicodeString.
3105    * @param textLength The number of Unicode characters in <code>text</code> to alias.
3106    *                   If -1, then this constructor will determine the length
3107    *                   by calling <code>u_strlen()</code>.
3108    * @stable ICU 2.0
3109    */
3110   UnicodeString(UBool isTerminated,
3111                 ConstChar16Ptr text,
3112                 int32_t textLength);
3113 
3114   /**
3115    * Writable-aliasing char16_t* constructor.
3116    * The text will be used for the UnicodeString object, but
3117    * it will not be released when the UnicodeString is destroyed.
3118    * This has write-through semantics:
3119    * For as long as the capacity of the buffer is sufficient, write operations
3120    * will directly affect the buffer. When more capacity is necessary, then
3121    * a new buffer will be allocated and the contents copied as with regularly
3122    * constructed strings.
3123    * In an assignment to another UnicodeString, the buffer will be copied.
3124    * The extract(Char16Ptr dst) function detects whether the dst pointer is the same
3125    * as the string buffer itself and will in this case not copy the contents.
3126    *
3127    * @param buffer The characters to alias for the UnicodeString.
3128    * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
3129    * @param buffCapacity The size of <code>buffer</code> in char16_ts.
3130    * @stable ICU 2.0
3131    */
3132   UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity);
3133 
3134 #if !U_CHAR16_IS_TYPEDEF
3135   /**
3136    * Writable-aliasing uint16_t * constructor.
3137    * Delegates to UnicodeString(const char16_t *, int32_t, int32_t).
3138    * @param buffer writable buffer of/for UTF-16 text
3139    * @param buffLength length of the current buffer contents
3140    * @param buffCapacity buffer capacity
3141    * @stable ICU 59
3142    */
UnicodeString(uint16_t * buffer,int32_t buffLength,int32_t buffCapacity)3143   UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity) :
3144       UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {}
3145 #endif
3146 
3147 #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
3148   /**
3149    * Writable-aliasing wchar_t * constructor.
3150    * (Only defined if U_SIZEOF_WCHAR_T==2.)
3151    * Delegates to UnicodeString(const char16_t *, int32_t, int32_t).
3152    * @param buffer writable buffer of/for UTF-16 text
3153    * @param buffLength length of the current buffer contents
3154    * @param buffCapacity buffer capacity
3155    * @stable ICU 59
3156    */
UnicodeString(wchar_t * buffer,int32_t buffLength,int32_t buffCapacity)3157   UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity) :
3158       UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {}
3159 #endif
3160 
3161   /**
3162    * Writable-aliasing nullptr_t constructor.
3163    * Effectively the same as the default constructor, makes an empty string object.
3164    * @param buffer nullptr
3165    * @param buffLength ignored
3166    * @param buffCapacity ignored
3167    * @stable ICU 59
3168    */
3169   inline UnicodeString(std::nullptr_t buffer, int32_t buffLength, int32_t buffCapacity);
3170 
3171 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
3172 
3173   /**
3174    * char* constructor.
3175    * Uses the default converter (and thus depends on the ICU conversion code)
3176    * unless U_CHARSET_IS_UTF8 is set to 1.
3177    *
3178    * For ASCII (really "invariant character") strings it is more efficient to use
3179    * the constructor that takes a US_INV (for its enum EInvariant).
3180    * For ASCII (invariant-character) string literals, see UNICODE_STRING and
3181    * UNICODE_STRING_SIMPLE.
3182    *
3183    * It is recommended to mark this constructor "explicit" by
3184    * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
3185    * on the compiler command line or similar.
3186    * @param codepageData an array of bytes, null-terminated,
3187    *                     in the platform's default codepage.
3188    * @stable ICU 2.0
3189    * @see UNICODE_STRING
3190    * @see UNICODE_STRING_SIMPLE
3191    */
3192   UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
3193 
3194   /**
3195    * char* constructor.
3196    * Uses the default converter (and thus depends on the ICU conversion code)
3197    * unless U_CHARSET_IS_UTF8 is set to 1.
3198    * @param codepageData an array of bytes in the platform's default codepage.
3199    * @param dataLength The number of bytes in <TT>codepageData</TT>.
3200    * @stable ICU 2.0
3201    */
3202   UnicodeString(const char *codepageData, int32_t dataLength);
3203 
3204 #endif
3205 
3206 #if !UCONFIG_NO_CONVERSION
3207 
3208   /**
3209    * char* constructor.
3210    * @param codepageData an array of bytes, null-terminated
3211    * @param codepage the encoding of <TT>codepageData</TT>.  The special
3212    * value 0 for <TT>codepage</TT> indicates that the text is in the
3213    * platform's default codepage.
3214    *
3215    * If <code>codepage</code> is an empty string (<code>""</code>),
3216    * then a simple conversion is performed on the codepage-invariant
3217    * subset ("invariant characters") of the platform encoding. See utypes.h.
3218    * Recommendation: For invariant-character strings use the constructor
3219    * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
3220    * because it avoids object code dependencies of UnicodeString on
3221    * the conversion code.
3222    *
3223    * @stable ICU 2.0
3224    */
3225   UnicodeString(const char *codepageData, const char *codepage);
3226 
3227   /**
3228    * char* constructor.
3229    * @param codepageData an array of bytes.
3230    * @param dataLength The number of bytes in <TT>codepageData</TT>.
3231    * @param codepage the encoding of <TT>codepageData</TT>.  The special
3232    * value 0 for <TT>codepage</TT> indicates that the text is in the
3233    * platform's default codepage.
3234    * If <code>codepage</code> is an empty string (<code>""</code>),
3235    * then a simple conversion is performed on the codepage-invariant
3236    * subset ("invariant characters") of the platform encoding. See utypes.h.
3237    * Recommendation: For invariant-character strings use the constructor
3238    * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
3239    * because it avoids object code dependencies of UnicodeString on
3240    * the conversion code.
3241    *
3242    * @stable ICU 2.0
3243    */
3244   UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
3245 
3246   /**
3247    * char * / UConverter constructor.
3248    * This constructor uses an existing UConverter object to
3249    * convert the codepage string to Unicode and construct a UnicodeString
3250    * from that.
3251    *
3252    * The converter is reset at first.
3253    * If the error code indicates a failure before this constructor is called,
3254    * or if an error occurs during conversion or construction,
3255    * then the string will be bogus.
3256    *
3257    * This function avoids the overhead of opening and closing a converter if
3258    * multiple strings are constructed.
3259    *
3260    * @param src input codepage string
3261    * @param srcLength length of the input string, can be -1 for NUL-terminated strings
3262    * @param cnv converter object (ucnv_resetToUnicode() will be called),
3263    *        can be NULL for the default converter
3264    * @param errorCode normal ICU error code
3265    * @stable ICU 2.0
3266    */
3267   UnicodeString(
3268         const char *src, int32_t srcLength,
3269         UConverter *cnv,
3270         UErrorCode &errorCode);
3271 
3272 #endif
3273 
3274   /**
3275    * Constructs a Unicode string from an invariant-character char * string.
3276    * About invariant characters see utypes.h.
3277    * This constructor has no runtime dependency on conversion code and is
3278    * therefore recommended over ones taking a charset name string
3279    * (where the empty string "" indicates invariant-character conversion).
3280    *
3281    * Use the macro US_INV as the third, signature-distinguishing parameter.
3282    *
3283    * For example:
3284    * \code
3285    * void fn(const char *s) {
3286    *   UnicodeString ustr(s, -1, US_INV);
3287    *   // use ustr ...
3288    * }
3289    * \endcode
3290    *
3291    * @param src String using only invariant characters.
3292    * @param length Length of src, or -1 if NUL-terminated.
3293    * @param inv Signature-distinguishing paramater, use US_INV.
3294    *
3295    * @see US_INV
3296    * @stable ICU 3.2
3297    */
3298   UnicodeString(const char *src, int32_t length, enum EInvariant inv);
3299 
3300 
3301   /**
3302    * Copy constructor.
3303    *
3304    * Starting with ICU 2.4, the assignment operator and the copy constructor
3305    * allocate a new buffer and copy the buffer contents even for readonly aliases.
3306    * By contrast, the fastCopyFrom() function implements the old,
3307    * more efficient but less safe behavior
3308    * of making this string also a readonly alias to the same buffer.
3309    *
3310    * If the source object has an "open" buffer from getBuffer(minCapacity),
3311    * then the copy is an empty string.
3312    *
3313    * @param that The UnicodeString object to copy.
3314    * @stable ICU 2.0
3315    * @see fastCopyFrom
3316    */
3317   UnicodeString(const UnicodeString& that);
3318 
3319   /**
3320    * Move constructor; might leave src in bogus state.
3321    * This string will have the same contents and state that the source string had.
3322    * @param src source string
3323    * @stable ICU 56
3324    */
3325   UnicodeString(UnicodeString &&src) U_NOEXCEPT;
3326 
3327   /**
3328    * 'Substring' constructor from tail of source string.
3329    * @param src The UnicodeString object to copy.
3330    * @param srcStart The offset into <tt>src</tt> at which to start copying.
3331    * @stable ICU 2.2
3332    */
3333   UnicodeString(const UnicodeString& src, int32_t srcStart);
3334 
3335   /**
3336    * 'Substring' constructor from subrange of source string.
3337    * @param src The UnicodeString object to copy.
3338    * @param srcStart The offset into <tt>src</tt> at which to start copying.
3339    * @param srcLength The number of characters from <tt>src</tt> to copy.
3340    * @stable ICU 2.2
3341    */
3342   UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3343 
3344   /**
3345    * Clone this object, an instance of a subclass of Replaceable.
3346    * Clones can be used concurrently in multiple threads.
3347    * If a subclass does not implement clone(), or if an error occurs,
3348    * then NULL is returned.
3349    * The clone functions in all subclasses return a pointer to a Replaceable
3350    * because some compilers do not support covariant (same-as-this)
3351    * return types; cast to the appropriate subclass if necessary.
3352    * The caller must delete the clone.
3353    *
3354    * @return a clone of this object
3355    *
3356    * @see Replaceable::clone
3357    * @see getDynamicClassID
3358    * @stable ICU 2.6
3359    */
3360   virtual Replaceable *clone() const;
3361 
3362   /** Destructor.
3363    * @stable ICU 2.0
3364    */
3365   virtual ~UnicodeString();
3366 
3367   /**
3368    * Create a UnicodeString from a UTF-8 string.
3369    * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
3370    * Calls u_strFromUTF8WithSub().
3371    *
3372    * @param utf8 UTF-8 input string.
3373    *             Note that a StringPiece can be implicitly constructed
3374    *             from a std::string or a NUL-terminated const char * string.
3375    * @return A UnicodeString with equivalent UTF-16 contents.
3376    * @see toUTF8
3377    * @see toUTF8String
3378    * @stable ICU 4.2
3379    */
3380   static UnicodeString fromUTF8(StringPiece utf8);
3381 
3382   /**
3383    * Create a UnicodeString from a UTF-32 string.
3384    * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
3385    * Calls u_strFromUTF32WithSub().
3386    *
3387    * @param utf32 UTF-32 input string. Must not be NULL.
3388    * @param length Length of the input string, or -1 if NUL-terminated.
3389    * @return A UnicodeString with equivalent UTF-16 contents.
3390    * @see toUTF32
3391    * @stable ICU 4.2
3392    */
3393   static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
3394 
3395   /* Miscellaneous operations */
3396 
3397   /**
3398    * Unescape a string of characters and return a string containing
3399    * the result.  The following escape sequences are recognized:
3400    *
3401    * \\uhhhh       4 hex digits; h in [0-9A-Fa-f]
3402    * \\Uhhhhhhhh   8 hex digits
3403    * \\xhh         1-2 hex digits
3404    * \\ooo         1-3 octal digits; o in [0-7]
3405    * \\cX          control-X; X is masked with 0x1F
3406    *
3407    * as well as the standard ANSI C escapes:
3408    *
3409    * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
3410    * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
3411    * \\&quot; => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
3412    *
3413    * Anything else following a backslash is generically escaped.  For
3414    * example, "[a\\-z]" returns "[a-z]".
3415    *
3416    * If an escape sequence is ill-formed, this method returns an empty
3417    * string.  An example of an ill-formed sequence is "\\u" followed by
3418    * fewer than 4 hex digits.
3419    *
3420    * This function is similar to u_unescape() but not identical to it.
3421    * The latter takes a source char*, so it does escape recognition
3422    * and also invariant conversion.
3423    *
3424    * @return a string with backslash escapes interpreted, or an
3425    * empty string on error.
3426    * @see UnicodeString#unescapeAt()
3427    * @see u_unescape()
3428    * @see u_unescapeAt()
3429    * @stable ICU 2.0
3430    */
3431   UnicodeString unescape() const;
3432 
3433   /**
3434    * Unescape a single escape sequence and return the represented
3435    * character.  See unescape() for a listing of the recognized escape
3436    * sequences.  The character at offset-1 is assumed (without
3437    * checking) to be a backslash.  If the escape sequence is
3438    * ill-formed, or the offset is out of range, U_SENTINEL=-1 is
3439    * returned.
3440    *
3441    * @param offset an input output parameter.  On input, it is the
3442    * offset into this string where the escape sequence is located,
3443    * after the initial backslash.  On output, it is advanced after the
3444    * last character parsed.  On error, it is not advanced at all.
3445    * @return the character represented by the escape sequence at
3446    * offset, or U_SENTINEL=-1 on error.
3447    * @see UnicodeString#unescape()
3448    * @see u_unescape()
3449    * @see u_unescapeAt()
3450    * @stable ICU 2.0
3451    */
3452   UChar32 unescapeAt(int32_t &offset) const;
3453 
3454   /**
3455    * ICU "poor man's RTTI", returns a UClassID for this class.
3456    *
3457    * @stable ICU 2.2
3458    */
3459   static UClassID U_EXPORT2 getStaticClassID();
3460 
3461   /**
3462    * ICU "poor man's RTTI", returns a UClassID for the actual class.
3463    *
3464    * @stable ICU 2.2
3465    */
3466   virtual UClassID getDynamicClassID() const;
3467 
3468   //========================================
3469   // Implementation methods
3470   //========================================
3471 
3472 protected:
3473   /**
3474    * Implement Replaceable::getLength() (see jitterbug 1027).
3475    * @stable ICU 2.4
3476    */
3477   virtual int32_t getLength() const;
3478 
3479   /**
3480    * The change in Replaceable to use virtual getCharAt() allows
3481    * UnicodeString::charAt() to be inline again (see jitterbug 709).
3482    * @stable ICU 2.4
3483    */
3484   virtual char16_t getCharAt(int32_t offset) const;
3485 
3486   /**
3487    * The change in Replaceable to use virtual getChar32At() allows
3488    * UnicodeString::char32At() to be inline again (see jitterbug 709).
3489    * @stable ICU 2.4
3490    */
3491   virtual UChar32 getChar32At(int32_t offset) const;
3492 
3493 private:
3494   // For char* constructors. Could be made public.
3495   UnicodeString &setToUTF8(StringPiece utf8);
3496   // For extract(char*).
3497   // We could make a toUTF8(target, capacity, errorCode) public but not
3498   // this version: New API will be cleaner if we make callers create substrings
3499   // rather than having start+length on every method,
3500   // and it should take a UErrorCode&.
3501   int32_t
3502   toUTF8(int32_t start, int32_t len,
3503          char *target, int32_t capacity) const;
3504 
3505   /**
3506    * Internal string contents comparison, called by operator==.
3507    * Requires: this & text not bogus and have same lengths.
3508    */
3509   UBool doEquals(const UnicodeString &text, int32_t len) const;
3510 
3511   inline int8_t
3512   doCompare(int32_t start,
3513            int32_t length,
3514            const UnicodeString& srcText,
3515            int32_t srcStart,
3516            int32_t srcLength) const;
3517 
3518   int8_t doCompare(int32_t start,
3519            int32_t length,
3520            const char16_t *srcChars,
3521            int32_t srcStart,
3522            int32_t srcLength) const;
3523 
3524   inline int8_t
3525   doCompareCodePointOrder(int32_t start,
3526                           int32_t length,
3527                           const UnicodeString& srcText,
3528                           int32_t srcStart,
3529                           int32_t srcLength) const;
3530 
3531   int8_t doCompareCodePointOrder(int32_t start,
3532                                  int32_t length,
3533                                  const char16_t *srcChars,
3534                                  int32_t srcStart,
3535                                  int32_t srcLength) const;
3536 
3537   inline int8_t
3538   doCaseCompare(int32_t start,
3539                 int32_t length,
3540                 const UnicodeString &srcText,
3541                 int32_t srcStart,
3542                 int32_t srcLength,
3543                 uint32_t options) const;
3544 
3545   int8_t
3546   doCaseCompare(int32_t start,
3547                 int32_t length,
3548                 const char16_t *srcChars,
3549                 int32_t srcStart,
3550                 int32_t srcLength,
3551                 uint32_t options) const;
3552 
3553   int32_t doIndexOf(char16_t c,
3554             int32_t start,
3555             int32_t length) const;
3556 
3557   int32_t doIndexOf(UChar32 c,
3558                         int32_t start,
3559                         int32_t length) const;
3560 
3561   int32_t doLastIndexOf(char16_t c,
3562                 int32_t start,
3563                 int32_t length) const;
3564 
3565   int32_t doLastIndexOf(UChar32 c,
3566                             int32_t start,
3567                             int32_t length) const;
3568 
3569   void doExtract(int32_t start,
3570          int32_t length,
3571          char16_t *dst,
3572          int32_t dstStart) const;
3573 
3574   inline void doExtract(int32_t start,
3575          int32_t length,
3576          UnicodeString& target) const;
3577 
3578   inline char16_t doCharAt(int32_t offset)  const;
3579 
3580   UnicodeString& doReplace(int32_t start,
3581                int32_t length,
3582                const UnicodeString& srcText,
3583                int32_t srcStart,
3584                int32_t srcLength);
3585 
3586   UnicodeString& doReplace(int32_t start,
3587                int32_t length,
3588                const char16_t *srcChars,
3589                int32_t srcStart,
3590                int32_t srcLength);
3591 
3592   UnicodeString& doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3593   UnicodeString& doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcLength);
3594 
3595   UnicodeString& doReverse(int32_t start,
3596                int32_t length);
3597 
3598   // calculate hash code
3599   int32_t doHashCode(void) const;
3600 
3601   // get pointer to start of array
3602   // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
3603   inline char16_t* getArrayStart(void);
3604   inline const char16_t* getArrayStart(void) const;
3605 
3606   inline UBool hasShortLength() const;
3607   inline int32_t getShortLength() const;
3608 
3609   // A UnicodeString object (not necessarily its current buffer)
3610   // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
3611   inline UBool isWritable() const;
3612 
3613   // Is the current buffer writable?
3614   inline UBool isBufferWritable() const;
3615 
3616   // None of the following does releaseArray().
3617   inline void setZeroLength();
3618   inline void setShortLength(int32_t len);
3619   inline void setLength(int32_t len);
3620   inline void setToEmpty();
3621   inline void setArray(char16_t *array, int32_t len, int32_t capacity); // sets length but not flags
3622 
3623   // allocate the array; result may be the stack buffer
3624   // sets refCount to 1 if appropriate
3625   // sets fArray, fCapacity, and flags
3626   // sets length to 0
3627   // returns boolean for success or failure
3628   UBool allocate(int32_t capacity);
3629 
3630   // release the array if owned
3631   void releaseArray(void);
3632 
3633   // turn a bogus string into an empty one
3634   void unBogus();
3635 
3636   // implements assigment operator, copy constructor, and fastCopyFrom()
3637   UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
3638 
3639   // Copies just the fields without memory management.
3640   void copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) U_NOEXCEPT;
3641 
3642   // Pin start and limit to acceptable values.
3643   inline void pinIndex(int32_t& start) const;
3644   inline void pinIndices(int32_t& start,
3645                          int32_t& length) const;
3646 
3647 #if !UCONFIG_NO_CONVERSION
3648 
3649   /* Internal extract() using UConverter. */
3650   int32_t doExtract(int32_t start, int32_t length,
3651                     char *dest, int32_t destCapacity,
3652                     UConverter *cnv,
3653                     UErrorCode &errorCode) const;
3654 
3655   /*
3656    * Real constructor for converting from codepage data.
3657    * It assumes that it is called with !fRefCounted.
3658    *
3659    * If <code>codepage==0</code>, then the default converter
3660    * is used for the platform encoding.
3661    * If <code>codepage</code> is an empty string (<code>""</code>),
3662    * then a simple conversion is performed on the codepage-invariant
3663    * subset ("invariant characters") of the platform encoding. See utypes.h.
3664    */
3665   void doCodepageCreate(const char *codepageData,
3666                         int32_t dataLength,
3667                         const char *codepage);
3668 
3669   /*
3670    * Worker function for creating a UnicodeString from
3671    * a codepage string using a UConverter.
3672    */
3673   void
3674   doCodepageCreate(const char *codepageData,
3675                    int32_t dataLength,
3676                    UConverter *converter,
3677                    UErrorCode &status);
3678 
3679 #endif
3680 
3681   /*
3682    * This function is called when write access to the array
3683    * is necessary.
3684    *
3685    * We need to make a copy of the array if
3686    * the buffer is read-only, or
3687    * the buffer is refCounted (shared), and refCount>1, or
3688    * the buffer is too small.
3689    *
3690    * Return FALSE if memory could not be allocated.
3691    */
3692   UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
3693                             int32_t growCapacity = -1,
3694                             UBool doCopyArray = TRUE,
3695                             int32_t **pBufferToDelete = 0,
3696                             UBool forceClone = FALSE);
3697 
3698   /**
3699    * Common function for UnicodeString case mappings.
3700    * The stringCaseMapper has the same type UStringCaseMapper
3701    * as in ustr_imp.h for ustrcase_map().
3702    */
3703   UnicodeString &
3704   caseMap(int32_t caseLocale, uint32_t options,
3705 #if !UCONFIG_NO_BREAK_ITERATION
3706           BreakIterator *iter,
3707 #endif
3708           UStringCaseMapper *stringCaseMapper);
3709 
3710   // ref counting
3711   void addRef(void);
3712   int32_t removeRef(void);
3713   int32_t refCount(void) const;
3714 
3715   // constants
3716   enum {
3717     /**
3718      * Size of stack buffer for short strings.
3719      * Must be at least U16_MAX_LENGTH for the single-code point constructor to work.
3720      * @see UNISTR_OBJECT_SIZE
3721      */
3722     US_STACKBUF_SIZE=(int32_t)(UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR,
3723     kInvalidUChar=0xffff, // U+FFFF returned by charAt(invalid index)
3724     kInvalidHashCode=0, // invalid hash code
3725     kEmptyHashCode=1, // hash code for empty string
3726 
3727     // bit flag values for fLengthAndFlags
3728     kIsBogus=1,         // this string is bogus, i.e., not valid or NULL
3729     kUsingStackBuffer=2,// using fUnion.fStackFields instead of fUnion.fFields
3730     kRefCounted=4,      // there is a refCount field before the characters in fArray
3731     kBufferIsReadonly=8,// do not write to this buffer
3732     kOpenGetBuffer=16,  // getBuffer(minCapacity) was called (is "open"),
3733                         // and releaseBuffer(newLength) must be called
3734     kAllStorageFlags=0x1f,
3735 
3736     kLengthShift=5,     // remaining 11 bits for non-negative short length, or negative if long
3737     kLength1=1<<kLengthShift,
3738     kMaxShortLength=0x3ff,  // max non-negative short length (leaves top bit 0)
3739     kLengthIsLarge=0xffe0,  // short length < 0, real length is in fUnion.fFields.fLength
3740 
3741     // combined values for convenience
3742     kShortString=kUsingStackBuffer,
3743     kLongString=kRefCounted,
3744     kReadonlyAlias=kBufferIsReadonly,
3745     kWritableAlias=0
3746   };
3747 
3748   friend class UnicodeStringAppendable;
3749 
3750   union StackBufferOrFields;        // forward declaration necessary before friend declaration
3751   friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
3752 
3753   /*
3754    * The following are all the class fields that are stored
3755    * in each UnicodeString object.
3756    * Note that UnicodeString has virtual functions,
3757    * therefore there is an implicit vtable pointer
3758    * as the first real field.
3759    * The fields should be aligned such that no padding is necessary.
3760    * On 32-bit machines, the size should be 32 bytes,
3761    * on 64-bit machines (8-byte pointers), it should be 40 bytes.
3762    *
3763    * We use a hack to achieve this.
3764    *
3765    * With at least some compilers, each of the following is forced to
3766    * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
3767    * rounded up with additional padding if the fields do not already fit that requirement:
3768    * - sizeof(class UnicodeString)
3769    * - offsetof(UnicodeString, fUnion)
3770    * - sizeof(fUnion)
3771    * - sizeof(fStackFields)
3772    *
3773    * We optimize for the longest possible internal buffer for short strings.
3774    * fUnion.fStackFields begins with 2 bytes for storage flags
3775    * and the length of relatively short strings,
3776    * followed by the buffer for short string contents.
3777    * There is no padding inside fStackFields.
3778    *
3779    * Heap-allocated and aliased strings use fUnion.fFields.
3780    * Both fStackFields and fFields must begin with the same fields for flags and short length,
3781    * that is, those must have the same memory offsets inside the object,
3782    * because the flags must be inspected in order to decide which half of fUnion is being used.
3783    * We assume that the compiler does not reorder the fields.
3784    *
3785    * (Padding at the end of fFields is ok:
3786    * As long as it is no larger than fStackFields, it is not wasted space.)
3787    *
3788    * For some of the history of the UnicodeString class fields layout, see
3789    * - ICU ticket #11551 "longer UnicodeString contents in stack buffer"
3790    * - ICU ticket #11336 "UnicodeString: recombine stack buffer arrays"
3791    * - ICU ticket #8322 "why is sizeof(UnicodeString)==48?"
3792    */
3793   // (implicit) *vtable;
3794   union StackBufferOrFields {
3795     // fStackFields is used iff (fLengthAndFlags&kUsingStackBuffer) else fFields is used.
3796     // Each struct of the union must begin with fLengthAndFlags.
3797     struct {
3798       int16_t fLengthAndFlags;          // bit fields: see constants above
3799       char16_t fBuffer[US_STACKBUF_SIZE];  // buffer for short strings
3800     } fStackFields;
3801     struct {
3802       int16_t fLengthAndFlags;          // bit fields: see constants above
3803       int32_t fLength;    // number of characters in fArray if >127; else undefined
3804       int32_t fCapacity;  // capacity of fArray (in char16_ts)
3805       // array pointer last to minimize padding for machines with P128 data model
3806       // or pointer sizes that are not a power of 2
3807       char16_t   *fArray;    // the Unicode data
3808     } fFields;
3809   } fUnion;
3810 };
3811 
3812 /**
3813  * Create a new UnicodeString with the concatenation of two others.
3814  *
3815  * @param s1 The first string to be copied to the new one.
3816  * @param s2 The second string to be copied to the new one, after s1.
3817  * @return UnicodeString(s1).append(s2)
3818  * @stable ICU 2.8
3819  */
3820 U_COMMON_API UnicodeString U_EXPORT2
3821 operator+ (const UnicodeString &s1, const UnicodeString &s2);
3822 
3823 //========================================
3824 // Inline members
3825 //========================================
3826 
3827 //========================================
3828 // Privates
3829 //========================================
3830 
3831 inline void
pinIndex(int32_t & start)3832 UnicodeString::pinIndex(int32_t& start) const
3833 {
3834   // pin index
3835   if(start < 0) {
3836     start = 0;
3837   } else if(start > length()) {
3838     start = length();
3839   }
3840 }
3841 
3842 inline void
pinIndices(int32_t & start,int32_t & _length)3843 UnicodeString::pinIndices(int32_t& start,
3844                           int32_t& _length) const
3845 {
3846   // pin indices
3847   int32_t len = length();
3848   if(start < 0) {
3849     start = 0;
3850   } else if(start > len) {
3851     start = len;
3852   }
3853   if(_length < 0) {
3854     _length = 0;
3855   } else if(_length > (len - start)) {
3856     _length = (len - start);
3857   }
3858 }
3859 
3860 inline char16_t*
getArrayStart()3861 UnicodeString::getArrayStart() {
3862   return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
3863     fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
3864 }
3865 
3866 inline const char16_t*
getArrayStart()3867 UnicodeString::getArrayStart() const {
3868   return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
3869     fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
3870 }
3871 
3872 //========================================
3873 // Default constructor
3874 //========================================
3875 
3876 inline
UnicodeString()3877 UnicodeString::UnicodeString() {
3878   fUnion.fStackFields.fLengthAndFlags=kShortString;
3879 }
3880 
UnicodeString(const std::nullptr_t)3881 inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/) {
3882   fUnion.fStackFields.fLengthAndFlags=kShortString;
3883 }
3884 
UnicodeString(const std::nullptr_t,int32_t)3885 inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/, int32_t /*length*/) {
3886   fUnion.fStackFields.fLengthAndFlags=kShortString;
3887 }
3888 
UnicodeString(std::nullptr_t,int32_t,int32_t)3889 inline UnicodeString::UnicodeString(std::nullptr_t /*buffer*/, int32_t /*buffLength*/, int32_t /*buffCapacity*/) {
3890   fUnion.fStackFields.fLengthAndFlags=kShortString;
3891 }
3892 
3893 //========================================
3894 // Read-only implementation methods
3895 //========================================
3896 inline UBool
hasShortLength()3897 UnicodeString::hasShortLength() const {
3898   return fUnion.fFields.fLengthAndFlags>=0;
3899 }
3900 
3901 inline int32_t
getShortLength()3902 UnicodeString::getShortLength() const {
3903   // fLengthAndFlags must be non-negative -> short length >= 0
3904   // and arithmetic or logical shift does not matter.
3905   return fUnion.fFields.fLengthAndFlags>>kLengthShift;
3906 }
3907 
3908 inline int32_t
length()3909 UnicodeString::length() const {
3910   return hasShortLength() ? getShortLength() : fUnion.fFields.fLength;
3911 }
3912 
3913 inline int32_t
getCapacity()3914 UnicodeString::getCapacity() const {
3915   return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
3916     US_STACKBUF_SIZE : fUnion.fFields.fCapacity;
3917 }
3918 
3919 inline int32_t
hashCode()3920 UnicodeString::hashCode() const
3921 { return doHashCode(); }
3922 
3923 inline UBool
isBogus()3924 UnicodeString::isBogus() const
3925 { return (UBool)(fUnion.fFields.fLengthAndFlags & kIsBogus); }
3926 
3927 inline UBool
isWritable()3928 UnicodeString::isWritable() const
3929 { return (UBool)!(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus)); }
3930 
3931 inline UBool
isBufferWritable()3932 UnicodeString::isBufferWritable() const
3933 {
3934   return (UBool)(
3935       !(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
3936       (!(fUnion.fFields.fLengthAndFlags&kRefCounted) || refCount()==1));
3937 }
3938 
3939 inline const char16_t *
getBuffer()3940 UnicodeString::getBuffer() const {
3941   if(fUnion.fFields.fLengthAndFlags&(kIsBogus|kOpenGetBuffer)) {
3942     return nullptr;
3943   } else if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) {
3944     return fUnion.fStackFields.fBuffer;
3945   } else {
3946     return fUnion.fFields.fArray;
3947   }
3948 }
3949 
3950 //========================================
3951 // Read-only alias methods
3952 //========================================
3953 inline int8_t
doCompare(int32_t start,int32_t thisLength,const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)3954 UnicodeString::doCompare(int32_t start,
3955               int32_t thisLength,
3956               const UnicodeString& srcText,
3957               int32_t srcStart,
3958               int32_t srcLength) const
3959 {
3960   if(srcText.isBogus()) {
3961     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3962   } else {
3963     srcText.pinIndices(srcStart, srcLength);
3964     return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
3965   }
3966 }
3967 
3968 inline UBool
3969 UnicodeString::operator== (const UnicodeString& text) const
3970 {
3971   if(isBogus()) {
3972     return text.isBogus();
3973   } else {
3974     int32_t len = length(), textLength = text.length();
3975     return !text.isBogus() && len == textLength && doEquals(text, len);
3976   }
3977 }
3978 
3979 inline UBool
3980 UnicodeString::operator!= (const UnicodeString& text) const
3981 { return (! operator==(text)); }
3982 
3983 inline UBool
3984 UnicodeString::operator> (const UnicodeString& text) const
3985 { return doCompare(0, length(), text, 0, text.length()) == 1; }
3986 
3987 inline UBool
3988 UnicodeString::operator< (const UnicodeString& text) const
3989 { return doCompare(0, length(), text, 0, text.length()) == -1; }
3990 
3991 inline UBool
3992 UnicodeString::operator>= (const UnicodeString& text) const
3993 { return doCompare(0, length(), text, 0, text.length()) != -1; }
3994 
3995 inline UBool
3996 UnicodeString::operator<= (const UnicodeString& text) const
3997 { return doCompare(0, length(), text, 0, text.length()) != 1; }
3998 
3999 inline int8_t
compare(const UnicodeString & text)4000 UnicodeString::compare(const UnicodeString& text) const
4001 { return doCompare(0, length(), text, 0, text.length()); }
4002 
4003 inline int8_t
compare(int32_t start,int32_t _length,const UnicodeString & srcText)4004 UnicodeString::compare(int32_t start,
4005                int32_t _length,
4006                const UnicodeString& srcText) const
4007 { return doCompare(start, _length, srcText, 0, srcText.length()); }
4008 
4009 inline int8_t
compare(ConstChar16Ptr srcChars,int32_t srcLength)4010 UnicodeString::compare(ConstChar16Ptr srcChars,
4011                int32_t srcLength) const
4012 { return doCompare(0, length(), srcChars, 0, srcLength); }
4013 
4014 inline int8_t
compare(int32_t start,int32_t _length,const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)4015 UnicodeString::compare(int32_t start,
4016                int32_t _length,
4017                const UnicodeString& srcText,
4018                int32_t srcStart,
4019                int32_t srcLength) const
4020 { return doCompare(start, _length, srcText, srcStart, srcLength); }
4021 
4022 inline int8_t
compare(int32_t start,int32_t _length,const char16_t * srcChars)4023 UnicodeString::compare(int32_t start,
4024                int32_t _length,
4025                const char16_t *srcChars) const
4026 { return doCompare(start, _length, srcChars, 0, _length); }
4027 
4028 inline int8_t
compare(int32_t start,int32_t _length,const char16_t * srcChars,int32_t srcStart,int32_t srcLength)4029 UnicodeString::compare(int32_t start,
4030                int32_t _length,
4031                const char16_t *srcChars,
4032                int32_t srcStart,
4033                int32_t srcLength) const
4034 { return doCompare(start, _length, srcChars, srcStart, srcLength); }
4035 
4036 inline int8_t
compareBetween(int32_t start,int32_t limit,const UnicodeString & srcText,int32_t srcStart,int32_t srcLimit)4037 UnicodeString::compareBetween(int32_t start,
4038                   int32_t limit,
4039                   const UnicodeString& srcText,
4040                   int32_t srcStart,
4041                   int32_t srcLimit) const
4042 { return doCompare(start, limit - start,
4043            srcText, srcStart, srcLimit - srcStart); }
4044 
4045 inline int8_t
doCompareCodePointOrder(int32_t start,int32_t thisLength,const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)4046 UnicodeString::doCompareCodePointOrder(int32_t start,
4047                                        int32_t thisLength,
4048                                        const UnicodeString& srcText,
4049                                        int32_t srcStart,
4050                                        int32_t srcLength) const
4051 {
4052   if(srcText.isBogus()) {
4053     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
4054   } else {
4055     srcText.pinIndices(srcStart, srcLength);
4056     return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
4057   }
4058 }
4059 
4060 inline int8_t
compareCodePointOrder(const UnicodeString & text)4061 UnicodeString::compareCodePointOrder(const UnicodeString& text) const
4062 { return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
4063 
4064 inline int8_t
compareCodePointOrder(int32_t start,int32_t _length,const UnicodeString & srcText)4065 UnicodeString::compareCodePointOrder(int32_t start,
4066                                      int32_t _length,
4067                                      const UnicodeString& srcText) const
4068 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
4069 
4070 inline int8_t
compareCodePointOrder(ConstChar16Ptr srcChars,int32_t srcLength)4071 UnicodeString::compareCodePointOrder(ConstChar16Ptr srcChars,
4072                                      int32_t srcLength) const
4073 { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
4074 
4075 inline int8_t
compareCodePointOrder(int32_t start,int32_t _length,const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)4076 UnicodeString::compareCodePointOrder(int32_t start,
4077                                      int32_t _length,
4078                                      const UnicodeString& srcText,
4079                                      int32_t srcStart,
4080                                      int32_t srcLength) const
4081 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
4082 
4083 inline int8_t
compareCodePointOrder(int32_t start,int32_t _length,const char16_t * srcChars)4084 UnicodeString::compareCodePointOrder(int32_t start,
4085                                      int32_t _length,
4086                                      const char16_t *srcChars) const
4087 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
4088 
4089 inline int8_t
compareCodePointOrder(int32_t start,int32_t _length,const char16_t * srcChars,int32_t srcStart,int32_t srcLength)4090 UnicodeString::compareCodePointOrder(int32_t start,
4091                                      int32_t _length,
4092                                      const char16_t *srcChars,
4093                                      int32_t srcStart,
4094                                      int32_t srcLength) const
4095 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
4096 
4097 inline int8_t
compareCodePointOrderBetween(int32_t start,int32_t limit,const UnicodeString & srcText,int32_t srcStart,int32_t srcLimit)4098 UnicodeString::compareCodePointOrderBetween(int32_t start,
4099                                             int32_t limit,
4100                                             const UnicodeString& srcText,
4101                                             int32_t srcStart,
4102                                             int32_t srcLimit) const
4103 { return doCompareCodePointOrder(start, limit - start,
4104            srcText, srcStart, srcLimit - srcStart); }
4105 
4106 inline int8_t
doCaseCompare(int32_t start,int32_t thisLength,const UnicodeString & srcText,int32_t srcStart,int32_t srcLength,uint32_t options)4107 UnicodeString::doCaseCompare(int32_t start,
4108                              int32_t thisLength,
4109                              const UnicodeString &srcText,
4110                              int32_t srcStart,
4111                              int32_t srcLength,
4112                              uint32_t options) const
4113 {
4114   if(srcText.isBogus()) {
4115     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
4116   } else {
4117     srcText.pinIndices(srcStart, srcLength);
4118     return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
4119   }
4120 }
4121 
4122 inline int8_t
caseCompare(const UnicodeString & text,uint32_t options)4123 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
4124   return doCaseCompare(0, length(), text, 0, text.length(), options);
4125 }
4126 
4127 inline int8_t
caseCompare(int32_t start,int32_t _length,const UnicodeString & srcText,uint32_t options)4128 UnicodeString::caseCompare(int32_t start,
4129                            int32_t _length,
4130                            const UnicodeString &srcText,
4131                            uint32_t options) const {
4132   return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
4133 }
4134 
4135 inline int8_t
caseCompare(ConstChar16Ptr srcChars,int32_t srcLength,uint32_t options)4136 UnicodeString::caseCompare(ConstChar16Ptr srcChars,
4137                            int32_t srcLength,
4138                            uint32_t options) const {
4139   return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
4140 }
4141 
4142 inline int8_t
caseCompare(int32_t start,int32_t _length,const UnicodeString & srcText,int32_t srcStart,int32_t srcLength,uint32_t options)4143 UnicodeString::caseCompare(int32_t start,
4144                            int32_t _length,
4145                            const UnicodeString &srcText,
4146                            int32_t srcStart,
4147                            int32_t srcLength,
4148                            uint32_t options) const {
4149   return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
4150 }
4151 
4152 inline int8_t
caseCompare(int32_t start,int32_t _length,const char16_t * srcChars,uint32_t options)4153 UnicodeString::caseCompare(int32_t start,
4154                            int32_t _length,
4155                            const char16_t *srcChars,
4156                            uint32_t options) const {
4157   return doCaseCompare(start, _length, srcChars, 0, _length, options);
4158 }
4159 
4160 inline int8_t
caseCompare(int32_t start,int32_t _length,const char16_t * srcChars,int32_t srcStart,int32_t srcLength,uint32_t options)4161 UnicodeString::caseCompare(int32_t start,
4162                            int32_t _length,
4163                            const char16_t *srcChars,
4164                            int32_t srcStart,
4165                            int32_t srcLength,
4166                            uint32_t options) const {
4167   return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
4168 }
4169 
4170 inline int8_t
caseCompareBetween(int32_t start,int32_t limit,const UnicodeString & srcText,int32_t srcStart,int32_t srcLimit,uint32_t options)4171 UnicodeString::caseCompareBetween(int32_t start,
4172                                   int32_t limit,
4173                                   const UnicodeString &srcText,
4174                                   int32_t srcStart,
4175                                   int32_t srcLimit,
4176                                   uint32_t options) const {
4177   return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
4178 }
4179 
4180 inline int32_t
indexOf(const UnicodeString & srcText,int32_t srcStart,int32_t srcLength,int32_t start,int32_t _length)4181 UnicodeString::indexOf(const UnicodeString& srcText,
4182                int32_t srcStart,
4183                int32_t srcLength,
4184                int32_t start,
4185                int32_t _length) const
4186 {
4187   if(!srcText.isBogus()) {
4188     srcText.pinIndices(srcStart, srcLength);
4189     if(srcLength > 0) {
4190       return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
4191     }
4192   }
4193   return -1;
4194 }
4195 
4196 inline int32_t
indexOf(const UnicodeString & text)4197 UnicodeString::indexOf(const UnicodeString& text) const
4198 { return indexOf(text, 0, text.length(), 0, length()); }
4199 
4200 inline int32_t
indexOf(const UnicodeString & text,int32_t start)4201 UnicodeString::indexOf(const UnicodeString& text,
4202                int32_t start) const {
4203   pinIndex(start);
4204   return indexOf(text, 0, text.length(), start, length() - start);
4205 }
4206 
4207 inline int32_t
indexOf(const UnicodeString & text,int32_t start,int32_t _length)4208 UnicodeString::indexOf(const UnicodeString& text,
4209                int32_t start,
4210                int32_t _length) const
4211 { return indexOf(text, 0, text.length(), start, _length); }
4212 
4213 inline int32_t
indexOf(const char16_t * srcChars,int32_t srcLength,int32_t start)4214 UnicodeString::indexOf(const char16_t *srcChars,
4215                int32_t srcLength,
4216                int32_t start) const {
4217   pinIndex(start);
4218   return indexOf(srcChars, 0, srcLength, start, length() - start);
4219 }
4220 
4221 inline int32_t
indexOf(ConstChar16Ptr srcChars,int32_t srcLength,int32_t start,int32_t _length)4222 UnicodeString::indexOf(ConstChar16Ptr srcChars,
4223                int32_t srcLength,
4224                int32_t start,
4225                int32_t _length) const
4226 { return indexOf(srcChars, 0, srcLength, start, _length); }
4227 
4228 inline int32_t
indexOf(char16_t c,int32_t start,int32_t _length)4229 UnicodeString::indexOf(char16_t c,
4230                int32_t start,
4231                int32_t _length) const
4232 { return doIndexOf(c, start, _length); }
4233 
4234 inline int32_t
indexOf(UChar32 c,int32_t start,int32_t _length)4235 UnicodeString::indexOf(UChar32 c,
4236                int32_t start,
4237                int32_t _length) const
4238 { return doIndexOf(c, start, _length); }
4239 
4240 inline int32_t
indexOf(char16_t c)4241 UnicodeString::indexOf(char16_t c) const
4242 { return doIndexOf(c, 0, length()); }
4243 
4244 inline int32_t
indexOf(UChar32 c)4245 UnicodeString::indexOf(UChar32 c) const
4246 { return indexOf(c, 0, length()); }
4247 
4248 inline int32_t
indexOf(char16_t c,int32_t start)4249 UnicodeString::indexOf(char16_t c,
4250                int32_t start) const {
4251   pinIndex(start);
4252   return doIndexOf(c, start, length() - start);
4253 }
4254 
4255 inline int32_t
indexOf(UChar32 c,int32_t start)4256 UnicodeString::indexOf(UChar32 c,
4257                int32_t start) const {
4258   pinIndex(start);
4259   return indexOf(c, start, length() - start);
4260 }
4261 
4262 inline int32_t
lastIndexOf(ConstChar16Ptr srcChars,int32_t srcLength,int32_t start,int32_t _length)4263 UnicodeString::lastIndexOf(ConstChar16Ptr srcChars,
4264                int32_t srcLength,
4265                int32_t start,
4266                int32_t _length) const
4267 { return lastIndexOf(srcChars, 0, srcLength, start, _length); }
4268 
4269 inline int32_t
lastIndexOf(const char16_t * srcChars,int32_t srcLength,int32_t start)4270 UnicodeString::lastIndexOf(const char16_t *srcChars,
4271                int32_t srcLength,
4272                int32_t start) const {
4273   pinIndex(start);
4274   return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
4275 }
4276 
4277 inline int32_t
lastIndexOf(const UnicodeString & srcText,int32_t srcStart,int32_t srcLength,int32_t start,int32_t _length)4278 UnicodeString::lastIndexOf(const UnicodeString& srcText,
4279                int32_t srcStart,
4280                int32_t srcLength,
4281                int32_t start,
4282                int32_t _length) const
4283 {
4284   if(!srcText.isBogus()) {
4285     srcText.pinIndices(srcStart, srcLength);
4286     if(srcLength > 0) {
4287       return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
4288     }
4289   }
4290   return -1;
4291 }
4292 
4293 inline int32_t
lastIndexOf(const UnicodeString & text,int32_t start,int32_t _length)4294 UnicodeString::lastIndexOf(const UnicodeString& text,
4295                int32_t start,
4296                int32_t _length) const
4297 { return lastIndexOf(text, 0, text.length(), start, _length); }
4298 
4299 inline int32_t
lastIndexOf(const UnicodeString & text,int32_t start)4300 UnicodeString::lastIndexOf(const UnicodeString& text,
4301                int32_t start) const {
4302   pinIndex(start);
4303   return lastIndexOf(text, 0, text.length(), start, length() - start);
4304 }
4305 
4306 inline int32_t
lastIndexOf(const UnicodeString & text)4307 UnicodeString::lastIndexOf(const UnicodeString& text) const
4308 { return lastIndexOf(text, 0, text.length(), 0, length()); }
4309 
4310 inline int32_t
lastIndexOf(char16_t c,int32_t start,int32_t _length)4311 UnicodeString::lastIndexOf(char16_t c,
4312                int32_t start,
4313                int32_t _length) const
4314 { return doLastIndexOf(c, start, _length); }
4315 
4316 inline int32_t
lastIndexOf(UChar32 c,int32_t start,int32_t _length)4317 UnicodeString::lastIndexOf(UChar32 c,
4318                int32_t start,
4319                int32_t _length) const {
4320   return doLastIndexOf(c, start, _length);
4321 }
4322 
4323 inline int32_t
lastIndexOf(char16_t c)4324 UnicodeString::lastIndexOf(char16_t c) const
4325 { return doLastIndexOf(c, 0, length()); }
4326 
4327 inline int32_t
lastIndexOf(UChar32 c)4328 UnicodeString::lastIndexOf(UChar32 c) const {
4329   return lastIndexOf(c, 0, length());
4330 }
4331 
4332 inline int32_t
lastIndexOf(char16_t c,int32_t start)4333 UnicodeString::lastIndexOf(char16_t c,
4334                int32_t start) const {
4335   pinIndex(start);
4336   return doLastIndexOf(c, start, length() - start);
4337 }
4338 
4339 inline int32_t
lastIndexOf(UChar32 c,int32_t start)4340 UnicodeString::lastIndexOf(UChar32 c,
4341                int32_t start) const {
4342   pinIndex(start);
4343   return lastIndexOf(c, start, length() - start);
4344 }
4345 
4346 inline UBool
startsWith(const UnicodeString & text)4347 UnicodeString::startsWith(const UnicodeString& text) const
4348 { return compare(0, text.length(), text, 0, text.length()) == 0; }
4349 
4350 inline UBool
startsWith(const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)4351 UnicodeString::startsWith(const UnicodeString& srcText,
4352               int32_t srcStart,
4353               int32_t srcLength) const
4354 { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
4355 
4356 inline UBool
startsWith(ConstChar16Ptr srcChars,int32_t srcLength)4357 UnicodeString::startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const {
4358   if(srcLength < 0) {
4359     srcLength = u_strlen(toUCharPtr(srcChars));
4360   }
4361   return doCompare(0, srcLength, srcChars, 0, srcLength) == 0;
4362 }
4363 
4364 inline UBool
startsWith(const char16_t * srcChars,int32_t srcStart,int32_t srcLength)4365 UnicodeString::startsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const {
4366   if(srcLength < 0) {
4367     srcLength = u_strlen(toUCharPtr(srcChars));
4368   }
4369   return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;
4370 }
4371 
4372 inline UBool
endsWith(const UnicodeString & text)4373 UnicodeString::endsWith(const UnicodeString& text) const
4374 { return doCompare(length() - text.length(), text.length(),
4375            text, 0, text.length()) == 0; }
4376 
4377 inline UBool
endsWith(const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)4378 UnicodeString::endsWith(const UnicodeString& srcText,
4379             int32_t srcStart,
4380             int32_t srcLength) const {
4381   srcText.pinIndices(srcStart, srcLength);
4382   return doCompare(length() - srcLength, srcLength,
4383                    srcText, srcStart, srcLength) == 0;
4384 }
4385 
4386 inline UBool
endsWith(ConstChar16Ptr srcChars,int32_t srcLength)4387 UnicodeString::endsWith(ConstChar16Ptr srcChars,
4388             int32_t srcLength) const {
4389   if(srcLength < 0) {
4390     srcLength = u_strlen(toUCharPtr(srcChars));
4391   }
4392   return doCompare(length() - srcLength, srcLength,
4393                    srcChars, 0, srcLength) == 0;
4394 }
4395 
4396 inline UBool
endsWith(const char16_t * srcChars,int32_t srcStart,int32_t srcLength)4397 UnicodeString::endsWith(const char16_t *srcChars,
4398             int32_t srcStart,
4399             int32_t srcLength) const {
4400   if(srcLength < 0) {
4401     srcLength = u_strlen(toUCharPtr(srcChars + srcStart));
4402   }
4403   return doCompare(length() - srcLength, srcLength,
4404                    srcChars, srcStart, srcLength) == 0;
4405 }
4406 
4407 //========================================
4408 // replace
4409 //========================================
4410 inline UnicodeString&
replace(int32_t start,int32_t _length,const UnicodeString & srcText)4411 UnicodeString::replace(int32_t start,
4412                int32_t _length,
4413                const UnicodeString& srcText)
4414 { return doReplace(start, _length, srcText, 0, srcText.length()); }
4415 
4416 inline UnicodeString&
replace(int32_t start,int32_t _length,const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)4417 UnicodeString::replace(int32_t start,
4418                int32_t _length,
4419                const UnicodeString& srcText,
4420                int32_t srcStart,
4421                int32_t srcLength)
4422 { return doReplace(start, _length, srcText, srcStart, srcLength); }
4423 
4424 inline UnicodeString&
replace(int32_t start,int32_t _length,ConstChar16Ptr srcChars,int32_t srcLength)4425 UnicodeString::replace(int32_t start,
4426                int32_t _length,
4427                ConstChar16Ptr srcChars,
4428                int32_t srcLength)
4429 { return doReplace(start, _length, srcChars, 0, srcLength); }
4430 
4431 inline UnicodeString&
replace(int32_t start,int32_t _length,const char16_t * srcChars,int32_t srcStart,int32_t srcLength)4432 UnicodeString::replace(int32_t start,
4433                int32_t _length,
4434                const char16_t *srcChars,
4435                int32_t srcStart,
4436                int32_t srcLength)
4437 { return doReplace(start, _length, srcChars, srcStart, srcLength); }
4438 
4439 inline UnicodeString&
replace(int32_t start,int32_t _length,char16_t srcChar)4440 UnicodeString::replace(int32_t start,
4441                int32_t _length,
4442                char16_t srcChar)
4443 { return doReplace(start, _length, &srcChar, 0, 1); }
4444 
4445 inline UnicodeString&
replaceBetween(int32_t start,int32_t limit,const UnicodeString & srcText)4446 UnicodeString::replaceBetween(int32_t start,
4447                   int32_t limit,
4448                   const UnicodeString& srcText)
4449 { return doReplace(start, limit - start, srcText, 0, srcText.length()); }
4450 
4451 inline UnicodeString&
replaceBetween(int32_t start,int32_t limit,const UnicodeString & srcText,int32_t srcStart,int32_t srcLimit)4452 UnicodeString::replaceBetween(int32_t start,
4453                   int32_t limit,
4454                   const UnicodeString& srcText,
4455                   int32_t srcStart,
4456                   int32_t srcLimit)
4457 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
4458 
4459 inline UnicodeString&
findAndReplace(const UnicodeString & oldText,const UnicodeString & newText)4460 UnicodeString::findAndReplace(const UnicodeString& oldText,
4461                   const UnicodeString& newText)
4462 { return findAndReplace(0, length(), oldText, 0, oldText.length(),
4463             newText, 0, newText.length()); }
4464 
4465 inline UnicodeString&
findAndReplace(int32_t start,int32_t _length,const UnicodeString & oldText,const UnicodeString & newText)4466 UnicodeString::findAndReplace(int32_t start,
4467                   int32_t _length,
4468                   const UnicodeString& oldText,
4469                   const UnicodeString& newText)
4470 { return findAndReplace(start, _length, oldText, 0, oldText.length(),
4471             newText, 0, newText.length()); }
4472 
4473 // ============================
4474 // extract
4475 // ============================
4476 inline void
doExtract(int32_t start,int32_t _length,UnicodeString & target)4477 UnicodeString::doExtract(int32_t start,
4478              int32_t _length,
4479              UnicodeString& target) const
4480 { target.replace(0, target.length(), *this, start, _length); }
4481 
4482 inline void
extract(int32_t start,int32_t _length,Char16Ptr target,int32_t targetStart)4483 UnicodeString::extract(int32_t start,
4484                int32_t _length,
4485                Char16Ptr target,
4486                int32_t targetStart) const
4487 { doExtract(start, _length, target, targetStart); }
4488 
4489 inline void
extract(int32_t start,int32_t _length,UnicodeString & target)4490 UnicodeString::extract(int32_t start,
4491                int32_t _length,
4492                UnicodeString& target) const
4493 { doExtract(start, _length, target); }
4494 
4495 #if !UCONFIG_NO_CONVERSION
4496 
4497 inline int32_t
extract(int32_t start,int32_t _length,char * dst,const char * codepage)4498 UnicodeString::extract(int32_t start,
4499                int32_t _length,
4500                char *dst,
4501                const char *codepage) const
4502 
4503 {
4504   // This dstSize value will be checked explicitly
4505   return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
4506 }
4507 
4508 #endif
4509 
4510 inline void
extractBetween(int32_t start,int32_t limit,char16_t * dst,int32_t dstStart)4511 UnicodeString::extractBetween(int32_t start,
4512                   int32_t limit,
4513                   char16_t *dst,
4514                   int32_t dstStart) const {
4515   pinIndex(start);
4516   pinIndex(limit);
4517   doExtract(start, limit - start, dst, dstStart);
4518 }
4519 
4520 inline UnicodeString
tempSubStringBetween(int32_t start,int32_t limit)4521 UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
4522     return tempSubString(start, limit - start);
4523 }
4524 
4525 inline char16_t
doCharAt(int32_t offset)4526 UnicodeString::doCharAt(int32_t offset) const
4527 {
4528   if((uint32_t)offset < (uint32_t)length()) {
4529     return getArrayStart()[offset];
4530   } else {
4531     return kInvalidUChar;
4532   }
4533 }
4534 
4535 inline char16_t
charAt(int32_t offset)4536 UnicodeString::charAt(int32_t offset) const
4537 { return doCharAt(offset); }
4538 
4539 inline char16_t
4540 UnicodeString::operator[] (int32_t offset) const
4541 { return doCharAt(offset); }
4542 
4543 inline UBool
isEmpty()4544 UnicodeString::isEmpty() const {
4545   // Arithmetic or logical right shift does not matter: only testing for 0.
4546   return (fUnion.fFields.fLengthAndFlags>>kLengthShift) == 0;
4547 }
4548 
4549 //========================================
4550 // Write implementation methods
4551 //========================================
4552 inline void
setZeroLength()4553 UnicodeString::setZeroLength() {
4554   fUnion.fFields.fLengthAndFlags &= kAllStorageFlags;
4555 }
4556 
4557 inline void
setShortLength(int32_t len)4558 UnicodeString::setShortLength(int32_t len) {
4559   // requires 0 <= len <= kMaxShortLength
4560   fUnion.fFields.fLengthAndFlags =
4561     (int16_t)((fUnion.fFields.fLengthAndFlags & kAllStorageFlags) | (len << kLengthShift));
4562 }
4563 
4564 inline void
setLength(int32_t len)4565 UnicodeString::setLength(int32_t len) {
4566   if(len <= kMaxShortLength) {
4567     setShortLength(len);
4568   } else {
4569     fUnion.fFields.fLengthAndFlags |= kLengthIsLarge;
4570     fUnion.fFields.fLength = len;
4571   }
4572 }
4573 
4574 inline void
setToEmpty()4575 UnicodeString::setToEmpty() {
4576   fUnion.fFields.fLengthAndFlags = kShortString;
4577 }
4578 
4579 inline void
setArray(char16_t * array,int32_t len,int32_t capacity)4580 UnicodeString::setArray(char16_t *array, int32_t len, int32_t capacity) {
4581   setLength(len);
4582   fUnion.fFields.fArray = array;
4583   fUnion.fFields.fCapacity = capacity;
4584 }
4585 
4586 inline UnicodeString&
4587 UnicodeString::operator= (char16_t ch)
4588 { return doReplace(0, length(), &ch, 0, 1); }
4589 
4590 inline UnicodeString&
4591 UnicodeString::operator= (UChar32 ch)
4592 { return replace(0, length(), ch); }
4593 
4594 inline UnicodeString&
setTo(const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)4595 UnicodeString::setTo(const UnicodeString& srcText,
4596              int32_t srcStart,
4597              int32_t srcLength)
4598 {
4599   unBogus();
4600   return doReplace(0, length(), srcText, srcStart, srcLength);
4601 }
4602 
4603 inline UnicodeString&
setTo(const UnicodeString & srcText,int32_t srcStart)4604 UnicodeString::setTo(const UnicodeString& srcText,
4605              int32_t srcStart)
4606 {
4607   unBogus();
4608   srcText.pinIndex(srcStart);
4609   return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
4610 }
4611 
4612 inline UnicodeString&
setTo(const UnicodeString & srcText)4613 UnicodeString::setTo(const UnicodeString& srcText)
4614 {
4615   return copyFrom(srcText);
4616 }
4617 
4618 inline UnicodeString&
setTo(const char16_t * srcChars,int32_t srcLength)4619 UnicodeString::setTo(const char16_t *srcChars,
4620              int32_t srcLength)
4621 {
4622   unBogus();
4623   return doReplace(0, length(), srcChars, 0, srcLength);
4624 }
4625 
4626 inline UnicodeString&
setTo(char16_t srcChar)4627 UnicodeString::setTo(char16_t srcChar)
4628 {
4629   unBogus();
4630   return doReplace(0, length(), &srcChar, 0, 1);
4631 }
4632 
4633 inline UnicodeString&
setTo(UChar32 srcChar)4634 UnicodeString::setTo(UChar32 srcChar)
4635 {
4636   unBogus();
4637   return replace(0, length(), srcChar);
4638 }
4639 
4640 inline UnicodeString&
append(const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)4641 UnicodeString::append(const UnicodeString& srcText,
4642               int32_t srcStart,
4643               int32_t srcLength)
4644 { return doAppend(srcText, srcStart, srcLength); }
4645 
4646 inline UnicodeString&
append(const UnicodeString & srcText)4647 UnicodeString::append(const UnicodeString& srcText)
4648 { return doAppend(srcText, 0, srcText.length()); }
4649 
4650 inline UnicodeString&
append(const char16_t * srcChars,int32_t srcStart,int32_t srcLength)4651 UnicodeString::append(const char16_t *srcChars,
4652               int32_t srcStart,
4653               int32_t srcLength)
4654 { return doAppend(srcChars, srcStart, srcLength); }
4655 
4656 inline UnicodeString&
append(ConstChar16Ptr srcChars,int32_t srcLength)4657 UnicodeString::append(ConstChar16Ptr srcChars,
4658               int32_t srcLength)
4659 { return doAppend(srcChars, 0, srcLength); }
4660 
4661 inline UnicodeString&
append(char16_t srcChar)4662 UnicodeString::append(char16_t srcChar)
4663 { return doAppend(&srcChar, 0, 1); }
4664 
4665 inline UnicodeString&
4666 UnicodeString::operator+= (char16_t ch)
4667 { return doAppend(&ch, 0, 1); }
4668 
4669 inline UnicodeString&
4670 UnicodeString::operator+= (UChar32 ch) {
4671   return append(ch);
4672 }
4673 
4674 inline UnicodeString&
4675 UnicodeString::operator+= (const UnicodeString& srcText)
4676 { return doAppend(srcText, 0, srcText.length()); }
4677 
4678 inline UnicodeString&
insert(int32_t start,const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)4679 UnicodeString::insert(int32_t start,
4680               const UnicodeString& srcText,
4681               int32_t srcStart,
4682               int32_t srcLength)
4683 { return doReplace(start, 0, srcText, srcStart, srcLength); }
4684 
4685 inline UnicodeString&
insert(int32_t start,const UnicodeString & srcText)4686 UnicodeString::insert(int32_t start,
4687               const UnicodeString& srcText)
4688 { return doReplace(start, 0, srcText, 0, srcText.length()); }
4689 
4690 inline UnicodeString&
insert(int32_t start,const char16_t * srcChars,int32_t srcStart,int32_t srcLength)4691 UnicodeString::insert(int32_t start,
4692               const char16_t *srcChars,
4693               int32_t srcStart,
4694               int32_t srcLength)
4695 { return doReplace(start, 0, srcChars, srcStart, srcLength); }
4696 
4697 inline UnicodeString&
insert(int32_t start,ConstChar16Ptr srcChars,int32_t srcLength)4698 UnicodeString::insert(int32_t start,
4699               ConstChar16Ptr srcChars,
4700               int32_t srcLength)
4701 { return doReplace(start, 0, srcChars, 0, srcLength); }
4702 
4703 inline UnicodeString&
insert(int32_t start,char16_t srcChar)4704 UnicodeString::insert(int32_t start,
4705               char16_t srcChar)
4706 { return doReplace(start, 0, &srcChar, 0, 1); }
4707 
4708 inline UnicodeString&
insert(int32_t start,UChar32 srcChar)4709 UnicodeString::insert(int32_t start,
4710               UChar32 srcChar)
4711 { return replace(start, 0, srcChar); }
4712 
4713 
4714 inline UnicodeString&
remove()4715 UnicodeString::remove()
4716 {
4717   // remove() of a bogus string makes the string empty and non-bogus
4718   if(isBogus()) {
4719     setToEmpty();
4720   } else {
4721     setZeroLength();
4722   }
4723   return *this;
4724 }
4725 
4726 inline UnicodeString&
remove(int32_t start,int32_t _length)4727 UnicodeString::remove(int32_t start,
4728              int32_t _length)
4729 {
4730     if(start <= 0 && _length == INT32_MAX) {
4731         // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
4732         return remove();
4733     }
4734     return doReplace(start, _length, NULL, 0, 0);
4735 }
4736 
4737 inline UnicodeString&
removeBetween(int32_t start,int32_t limit)4738 UnicodeString::removeBetween(int32_t start,
4739                 int32_t limit)
4740 { return doReplace(start, limit - start, NULL, 0, 0); }
4741 
4742 inline UnicodeString &
retainBetween(int32_t start,int32_t limit)4743 UnicodeString::retainBetween(int32_t start, int32_t limit) {
4744   truncate(limit);
4745   return doReplace(0, start, NULL, 0, 0);
4746 }
4747 
4748 inline UBool
truncate(int32_t targetLength)4749 UnicodeString::truncate(int32_t targetLength)
4750 {
4751   if(isBogus() && targetLength == 0) {
4752     // truncate(0) of a bogus string makes the string empty and non-bogus
4753     unBogus();
4754     return FALSE;
4755   } else if((uint32_t)targetLength < (uint32_t)length()) {
4756     setLength(targetLength);
4757     return TRUE;
4758   } else {
4759     return FALSE;
4760   }
4761 }
4762 
4763 inline UnicodeString&
reverse()4764 UnicodeString::reverse()
4765 { return doReverse(0, length()); }
4766 
4767 inline UnicodeString&
reverse(int32_t start,int32_t _length)4768 UnicodeString::reverse(int32_t start,
4769                int32_t _length)
4770 { return doReverse(start, _length); }
4771 
4772 U_NAMESPACE_END
4773 
4774 #endif
4775