1 /*
2 **********************************************************************
3 * Copyright (C) 1998-2011, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 *
7 * File unistr.h
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 * 09/25/98 stephen Creation.
13 * 11/11/98 stephen Changed per 11/9 code review.
14 * 04/20/99 stephen Overhauled per 4/16 code review.
15 * 11/18/99 aliu Made to inherit from Replaceable. Added method
16 * handleReplaceBetween(); other methods unchanged.
17 * 06/25/01 grhoten Remove dependency on iostream.
18 ******************************************************************************
19 */
20
21 #ifndef UNISTR_H
22 #define UNISTR_H
23
24 /**
25 * \file
26 * \brief C++ API: Unicode String
27 */
28
29 #include "unicode/utypes.h"
30 #include "unicode/rep.h"
31 #include "unicode/std_string.h"
32 #include "unicode/stringpiece.h"
33 #include "unicode/bytestream.h"
34
35 struct UConverter; // unicode/ucnv.h
36 class StringThreadTest;
37
38 #ifndef U_COMPARE_CODE_POINT_ORDER
39 /* see also ustring.h and unorm.h */
40 /**
41 * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
42 * Compare strings in code point order instead of code unit order.
43 * @stable ICU 2.2
44 */
45 #define U_COMPARE_CODE_POINT_ORDER 0x8000
46 #endif
47
48 #ifndef USTRING_H
49 /**
50 * \ingroup ustring_ustrlen
51 */
52 U_STABLE int32_t U_EXPORT2
53 u_strlen(const UChar *s);
54 #endif
55
56 U_NAMESPACE_BEGIN
57
58 class BreakIterator; // unicode/brkiter.h
59 class Locale; // unicode/locid.h
60 class StringCharacterIterator;
61 class UnicodeStringAppendable; // unicode/appendable.h
62
63 /* The <iostream> include has been moved to unicode/ustream.h */
64
65 /**
66 * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
67 * which constructs a Unicode string from an invariant-character char * string.
68 * About invariant characters see utypes.h.
69 * This constructor has no runtime dependency on conversion code and is
70 * therefore recommended over ones taking a charset name string
71 * (where the empty string "" indicates invariant-character conversion).
72 *
73 * @stable ICU 3.2
74 */
75 #define US_INV U_NAMESPACE_QUALIFIER UnicodeString::kInvariant
76
77 /**
78 * Unicode String literals in C++.
79 * Dependent on the platform properties, different UnicodeString
80 * constructors should be used to create a UnicodeString object from
81 * a string literal.
82 * The macros are defined for maximum performance.
83 * They work only for strings that contain "invariant characters", i.e.,
84 * only latin letters, digits, and some punctuation.
85 * See utypes.h for details.
86 *
87 * The string parameter must be a C string literal.
88 * The length of the string, not including the terminating
89 * <code>NUL</code>, must be specified as a constant.
90 * The U_STRING_DECL macro should be invoked exactly once for one
91 * such string variable before it is used.
92 * @stable ICU 2.0
93 */
94 #if defined(U_DECLARE_UTF16)
95 # define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)
96 #elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
97 # define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)L ## cs, _length)
98 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
99 # define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)cs, _length)
100 #else
101 # define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(cs, _length, US_INV)
102 #endif
103
104 /**
105 * Unicode String literals in C++.
106 * Dependent on the platform properties, different UnicodeString
107 * constructors should be used to create a UnicodeString object from
108 * a string literal.
109 * The macros are defined for improved performance.
110 * They work only for strings that contain "invariant characters", i.e.,
111 * only latin letters, digits, and some punctuation.
112 * See utypes.h for details.
113 *
114 * The string parameter must be a C string literal.
115 * @stable ICU 2.0
116 */
117 #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
118
119 /**
120 * UnicodeString is a string class that stores Unicode characters directly and provides
121 * similar functionality as the Java String and StringBuffer classes.
122 * It is a concrete implementation of the abstract class Replaceable (for transliteration).
123 *
124 * The UnicodeString class is not suitable for subclassing.
125 *
126 * <p>For an overview of Unicode strings in C and C++ see the
127 * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
128 *
129 * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.
130 * A Unicode character may be stored with either one code unit
131 * (the most common case) or with a matched pair of special code units
132 * ("surrogates"). The data type for code units is UChar.
133 * For single-character handling, a Unicode character code <em>point</em> is a value
134 * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p>
135 *
136 * <p>Indexes and offsets into and lengths of strings always count code units, not code points.
137 * This is the same as with multi-byte char* strings in traditional string handling.
138 * Operations on partial strings typically do not test for code point boundaries.
139 * If necessary, the user needs to take care of such boundaries by testing for the code unit
140 * values or by using functions like
141 * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
142 * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p>
143 *
144 * UnicodeString methods are more lenient with regard to input parameter values
145 * than other ICU APIs. In particular:
146 * - If indexes are out of bounds for a UnicodeString object
147 * (<0 or >length()) then they are "pinned" to the nearest boundary.
148 * - If primitive string pointer values (e.g., const UChar * or char *)
149 * for input strings are NULL, then those input string parameters are treated
150 * as if they pointed to an empty string.
151 * However, this is <em>not</em> the case for char * parameters for charset names
152 * or other IDs.
153 * - Most UnicodeString methods do not take a UErrorCode parameter because
154 * there are usually very few opportunities for failure other than a shortage
155 * of memory, error codes in low-level C++ string methods would be inconvenient,
156 * and the error code as the last parameter (ICU convention) would prevent
157 * the use of default parameter values.
158 * Instead, such methods set the UnicodeString into a "bogus" state
159 * (see isBogus()) if an error occurs.
160 *
161 * In string comparisons, two UnicodeString objects that are both "bogus"
162 * compare equal (to be transitive and prevent endless loops in sorting),
163 * and a "bogus" string compares less than any non-"bogus" one.
164 *
165 * Const UnicodeString methods are thread-safe. Multiple threads can use
166 * const methods on the same UnicodeString object simultaneously,
167 * but non-const methods must not be called concurrently (in multiple threads)
168 * with any other (const or non-const) methods.
169 *
170 * Similarly, const UnicodeString & parameters are thread-safe.
171 * One object may be passed in as such a parameter concurrently in multiple threads.
172 * This includes the const UnicodeString & parameters for
173 * copy construction, assignment, and cloning.
174 *
175 * <p>UnicodeString uses several storage methods.
176 * String contents can be stored inside the UnicodeString object itself,
177 * in an allocated and shared buffer, or in an outside buffer that is "aliased".
178 * Most of this is done transparently, but careful aliasing in particular provides
179 * significant performance improvements.
180 * Also, the internal buffer is accessible via special functions.
181 * For details see the
182 * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
183 *
184 * @see utf.h
185 * @see CharacterIterator
186 * @stable ICU 2.0
187 */
188 class U_COMMON_API UnicodeString : public Replaceable
189 {
190 public:
191
192 /**
193 * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
194 * which constructs a Unicode string from an invariant-character char * string.
195 * Use the macro US_INV instead of the full qualification for this value.
196 *
197 * @see US_INV
198 * @stable ICU 3.2
199 */
200 enum EInvariant {
201 /**
202 * @see EInvariant
203 * @stable ICU 3.2
204 */
205 kInvariant
206 };
207
208 //========================================
209 // Read-only operations
210 //========================================
211
212 /* Comparison - bitwise only - for international comparison use collation */
213
214 /**
215 * Equality operator. Performs only bitwise comparison.
216 * @param text The UnicodeString to compare to this one.
217 * @return TRUE if <TT>text</TT> contains the same characters as this one,
218 * FALSE otherwise.
219 * @stable ICU 2.0
220 */
221 inline UBool operator== (const UnicodeString& text) const;
222
223 /**
224 * Inequality operator. Performs only bitwise comparison.
225 * @param text The UnicodeString to compare to this one.
226 * @return FALSE if <TT>text</TT> contains the same characters as this one,
227 * TRUE otherwise.
228 * @stable ICU 2.0
229 */
230 inline UBool operator!= (const UnicodeString& text) const;
231
232 /**
233 * Greater than operator. Performs only bitwise comparison.
234 * @param text The UnicodeString to compare to this one.
235 * @return TRUE if the characters in this are bitwise
236 * greater than the characters in <code>text</code>, FALSE otherwise
237 * @stable ICU 2.0
238 */
239 inline UBool operator> (const UnicodeString& text) const;
240
241 /**
242 * Less than operator. Performs only bitwise comparison.
243 * @param text The UnicodeString to compare to this one.
244 * @return TRUE if the characters in this are bitwise
245 * less than the characters in <code>text</code>, FALSE otherwise
246 * @stable ICU 2.0
247 */
248 inline UBool operator< (const UnicodeString& text) const;
249
250 /**
251 * Greater than or equal operator. Performs only bitwise comparison.
252 * @param text The UnicodeString to compare to this one.
253 * @return TRUE if the characters in this are bitwise
254 * greater than or equal to the characters in <code>text</code>, FALSE otherwise
255 * @stable ICU 2.0
256 */
257 inline UBool operator>= (const UnicodeString& text) const;
258
259 /**
260 * Less than or equal operator. Performs only bitwise comparison.
261 * @param text The UnicodeString to compare to this one.
262 * @return TRUE if the characters in this are bitwise
263 * less than or equal to the characters in <code>text</code>, FALSE otherwise
264 * @stable ICU 2.0
265 */
266 inline UBool operator<= (const UnicodeString& text) const;
267
268 /**
269 * Compare the characters bitwise in this UnicodeString to
270 * the characters in <code>text</code>.
271 * @param text The UnicodeString to compare to this one.
272 * @return The result of bitwise character comparison: 0 if this
273 * contains the same characters as <code>text</code>, -1 if the characters in
274 * this are bitwise less than the characters in <code>text</code>, +1 if the
275 * characters in this are bitwise greater than the characters
276 * in <code>text</code>.
277 * @stable ICU 2.0
278 */
279 inline int8_t compare(const UnicodeString& text) const;
280
281 /**
282 * Compare the characters bitwise in the range
283 * [<TT>start</TT>, <TT>start + length</TT>) with the characters
284 * in <TT>text</TT>
285 * @param start the offset at which the compare operation begins
286 * @param length the number of characters of text to compare.
287 * @param text the other text to be compared against this string.
288 * @return The result of bitwise character comparison: 0 if this
289 * contains the same characters as <code>text</code>, -1 if the characters in
290 * this are bitwise less than the characters in <code>text</code>, +1 if the
291 * characters in this are bitwise greater than the characters
292 * in <code>text</code>.
293 * @stable ICU 2.0
294 */
295 inline int8_t compare(int32_t start,
296 int32_t length,
297 const UnicodeString& text) const;
298
299 /**
300 * Compare the characters bitwise in the range
301 * [<TT>start</TT>, <TT>start + length</TT>) with the characters
302 * in <TT>srcText</TT> in the range
303 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
304 * @param start the offset at which the compare operation begins
305 * @param length the number of characters in this to compare.
306 * @param srcText the text to be compared
307 * @param srcStart the offset into <TT>srcText</TT> to start comparison
308 * @param srcLength the number of characters in <TT>src</TT> to compare
309 * @return The result of bitwise character comparison: 0 if this
310 * contains the same characters as <code>srcText</code>, -1 if the characters in
311 * this are bitwise less than the characters in <code>srcText</code>, +1 if the
312 * characters in this are bitwise greater than the characters
313 * in <code>srcText</code>.
314 * @stable ICU 2.0
315 */
316 inline int8_t compare(int32_t start,
317 int32_t length,
318 const UnicodeString& srcText,
319 int32_t srcStart,
320 int32_t srcLength) const;
321
322 /**
323 * Compare the characters bitwise in this UnicodeString with the first
324 * <TT>srcLength</TT> characters in <TT>srcChars</TT>.
325 * @param srcChars The characters to compare to this UnicodeString.
326 * @param srcLength the number of characters in <TT>srcChars</TT> to compare
327 * @return The result of bitwise character comparison: 0 if this
328 * contains the same characters as <code>srcChars</code>, -1 if the characters in
329 * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
330 * characters in this are bitwise greater than the characters
331 * in <code>srcChars</code>.
332 * @stable ICU 2.0
333 */
334 inline int8_t compare(const UChar *srcChars,
335 int32_t srcLength) const;
336
337 /**
338 * Compare the characters bitwise in the range
339 * [<TT>start</TT>, <TT>start + length</TT>) with the first
340 * <TT>length</TT> characters in <TT>srcChars</TT>
341 * @param start the offset at which the compare operation begins
342 * @param length the number of characters to compare.
343 * @param srcChars the characters to be compared
344 * @return The result of bitwise character comparison: 0 if this
345 * contains the same characters as <code>srcChars</code>, -1 if the characters in
346 * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
347 * characters in this are bitwise greater than the characters
348 * in <code>srcChars</code>.
349 * @stable ICU 2.0
350 */
351 inline int8_t compare(int32_t start,
352 int32_t length,
353 const UChar *srcChars) const;
354
355 /**
356 * Compare the characters bitwise in the range
357 * [<TT>start</TT>, <TT>start + length</TT>) with the characters
358 * in <TT>srcChars</TT> in the range
359 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
360 * @param start the offset at which the compare operation begins
361 * @param length the number of characters in this to compare
362 * @param srcChars the characters to be compared
363 * @param srcStart the offset into <TT>srcChars</TT> to start comparison
364 * @param srcLength the number of characters in <TT>srcChars</TT> to compare
365 * @return The result of bitwise character comparison: 0 if this
366 * contains the same characters as <code>srcChars</code>, -1 if the characters in
367 * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
368 * characters in this are bitwise greater than the characters
369 * in <code>srcChars</code>.
370 * @stable ICU 2.0
371 */
372 inline int8_t compare(int32_t start,
373 int32_t length,
374 const UChar *srcChars,
375 int32_t srcStart,
376 int32_t srcLength) const;
377
378 /**
379 * Compare the characters bitwise in the range
380 * [<TT>start</TT>, <TT>limit</TT>) with the characters
381 * in <TT>srcText</TT> in the range
382 * [<TT>srcStart</TT>, <TT>srcLimit</TT>).
383 * @param start the offset at which the compare operation begins
384 * @param limit the offset immediately following the compare operation
385 * @param srcText the text to be compared
386 * @param srcStart the offset into <TT>srcText</TT> to start comparison
387 * @param srcLimit the offset into <TT>srcText</TT> to limit comparison
388 * @return The result of bitwise character comparison: 0 if this
389 * contains the same characters as <code>srcText</code>, -1 if the characters in
390 * this are bitwise less than the characters in <code>srcText</code>, +1 if the
391 * characters in this are bitwise greater than the characters
392 * in <code>srcText</code>.
393 * @stable ICU 2.0
394 */
395 inline int8_t compareBetween(int32_t start,
396 int32_t limit,
397 const UnicodeString& srcText,
398 int32_t srcStart,
399 int32_t srcLimit) const;
400
401 /**
402 * Compare two Unicode strings in code point order.
403 * The result may be different from the results of compare(), operator<, etc.
404 * if supplementary characters are present:
405 *
406 * In UTF-16, supplementary characters (with code points U+10000 and above) are
407 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
408 * which means that they compare as less than some other BMP characters like U+feff.
409 * This function compares Unicode strings in code point order.
410 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
411 *
412 * @param text Another string to compare this one to.
413 * @return a negative/zero/positive integer corresponding to whether
414 * this string is less than/equal to/greater than the second one
415 * in code point order
416 * @stable ICU 2.0
417 */
418 inline int8_t compareCodePointOrder(const UnicodeString& text) const;
419
420 /**
421 * Compare two Unicode strings in code point order.
422 * The result may be different from the results of compare(), operator<, etc.
423 * if supplementary characters are present:
424 *
425 * In UTF-16, supplementary characters (with code points U+10000 and above) are
426 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
427 * which means that they compare as less than some other BMP characters like U+feff.
428 * This function compares Unicode strings in code point order.
429 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
430 *
431 * @param start The start offset in this string at which the compare operation begins.
432 * @param length The number of code units from this string to compare.
433 * @param srcText Another string to compare this one to.
434 * @return a negative/zero/positive integer corresponding to whether
435 * this string is less than/equal to/greater than the second one
436 * in code point order
437 * @stable ICU 2.0
438 */
439 inline int8_t compareCodePointOrder(int32_t start,
440 int32_t length,
441 const UnicodeString& srcText) const;
442
443 /**
444 * Compare two Unicode strings in code point order.
445 * The result may be different from the results of compare(), operator<, etc.
446 * if supplementary characters are present:
447 *
448 * In UTF-16, supplementary characters (with code points U+10000 and above) are
449 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
450 * which means that they compare as less than some other BMP characters like U+feff.
451 * This function compares Unicode strings in code point order.
452 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
453 *
454 * @param start The start offset in this string at which the compare operation begins.
455 * @param length The number of code units from this string to compare.
456 * @param srcText Another string to compare this one to.
457 * @param srcStart The start offset in that string at which the compare operation begins.
458 * @param srcLength The number of code units from that string to compare.
459 * @return a negative/zero/positive integer corresponding to whether
460 * this string is less than/equal to/greater than the second one
461 * in code point order
462 * @stable ICU 2.0
463 */
464 inline int8_t compareCodePointOrder(int32_t start,
465 int32_t length,
466 const UnicodeString& srcText,
467 int32_t srcStart,
468 int32_t srcLength) const;
469
470 /**
471 * Compare two Unicode strings in code point order.
472 * The result may be different from the results of compare(), operator<, etc.
473 * if supplementary characters are present:
474 *
475 * In UTF-16, supplementary characters (with code points U+10000 and above) are
476 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
477 * which means that they compare as less than some other BMP characters like U+feff.
478 * This function compares Unicode strings in code point order.
479 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
480 *
481 * @param srcChars A pointer to another string to compare this one to.
482 * @param srcLength The number of code units from that string to compare.
483 * @return a negative/zero/positive integer corresponding to whether
484 * this string is less than/equal to/greater than the second one
485 * in code point order
486 * @stable ICU 2.0
487 */
488 inline int8_t compareCodePointOrder(const UChar *srcChars,
489 int32_t srcLength) const;
490
491 /**
492 * Compare two Unicode strings in code point order.
493 * The result may be different from the results of compare(), operator<, etc.
494 * if supplementary characters are present:
495 *
496 * In UTF-16, supplementary characters (with code points U+10000 and above) are
497 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
498 * which means that they compare as less than some other BMP characters like U+feff.
499 * This function compares Unicode strings in code point order.
500 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
501 *
502 * @param start The start offset in this string at which the compare operation begins.
503 * @param length The number of code units from this string to compare.
504 * @param srcChars A pointer to another string to compare this one to.
505 * @return a negative/zero/positive integer corresponding to whether
506 * this string is less than/equal to/greater than the second one
507 * in code point order
508 * @stable ICU 2.0
509 */
510 inline int8_t compareCodePointOrder(int32_t start,
511 int32_t length,
512 const UChar *srcChars) const;
513
514 /**
515 * Compare two Unicode strings in code point order.
516 * The result may be different from the results of compare(), operator<, etc.
517 * if supplementary characters are present:
518 *
519 * In UTF-16, supplementary characters (with code points U+10000 and above) are
520 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
521 * which means that they compare as less than some other BMP characters like U+feff.
522 * This function compares Unicode strings in code point order.
523 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
524 *
525 * @param start The start offset in this string at which the compare operation begins.
526 * @param length The number of code units from this string to compare.
527 * @param srcChars A pointer to another string to compare this one to.
528 * @param srcStart The start offset in that string at which the compare operation begins.
529 * @param srcLength The number of code units from that string to compare.
530 * @return a negative/zero/positive integer corresponding to whether
531 * this string is less than/equal to/greater than the second one
532 * in code point order
533 * @stable ICU 2.0
534 */
535 inline int8_t compareCodePointOrder(int32_t start,
536 int32_t length,
537 const UChar *srcChars,
538 int32_t srcStart,
539 int32_t srcLength) const;
540
541 /**
542 * Compare two Unicode strings in code point order.
543 * The result may be different from the results of compare(), operator<, etc.
544 * if supplementary characters are present:
545 *
546 * In UTF-16, supplementary characters (with code points U+10000 and above) are
547 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
548 * which means that they compare as less than some other BMP characters like U+feff.
549 * This function compares Unicode strings in code point order.
550 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
551 *
552 * @param start The start offset in this string at which the compare operation begins.
553 * @param limit The offset after the last code unit from this string to compare.
554 * @param srcText Another string to compare this one to.
555 * @param srcStart The start offset in that string at which the compare operation begins.
556 * @param srcLimit The offset after the last code unit from that string to compare.
557 * @return a negative/zero/positive integer corresponding to whether
558 * this string is less than/equal to/greater than the second one
559 * in code point order
560 * @stable ICU 2.0
561 */
562 inline int8_t compareCodePointOrderBetween(int32_t start,
563 int32_t limit,
564 const UnicodeString& srcText,
565 int32_t srcStart,
566 int32_t srcLimit) const;
567
568 /**
569 * Compare two strings case-insensitively using full case folding.
570 * This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
571 *
572 * @param text Another string to compare this one to.
573 * @param options A bit set of options:
574 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
575 * Comparison in code unit order with default case folding.
576 *
577 * - U_COMPARE_CODE_POINT_ORDER
578 * Set to choose code point order instead of code unit order
579 * (see u_strCompare for details).
580 *
581 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
582 *
583 * @return A negative, zero, or positive integer indicating the comparison result.
584 * @stable ICU 2.0
585 */
586 inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
587
588 /**
589 * Compare two strings case-insensitively using full case folding.
590 * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
591 *
592 * @param start The start offset in this string at which the compare operation begins.
593 * @param length The number of code units from this string to compare.
594 * @param srcText Another string to compare this one to.
595 * @param options A bit set of options:
596 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
597 * Comparison in code unit order with default case folding.
598 *
599 * - U_COMPARE_CODE_POINT_ORDER
600 * Set to choose code point order instead of code unit order
601 * (see u_strCompare for details).
602 *
603 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
604 *
605 * @return A negative, zero, or positive integer indicating the comparison result.
606 * @stable ICU 2.0
607 */
608 inline int8_t caseCompare(int32_t start,
609 int32_t length,
610 const UnicodeString& srcText,
611 uint32_t options) const;
612
613 /**
614 * Compare two strings case-insensitively using full case folding.
615 * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
616 *
617 * @param start The start offset in this string at which the compare operation begins.
618 * @param length The number of code units from this string to compare.
619 * @param srcText Another string to compare this one to.
620 * @param srcStart The start offset in that string at which the compare operation begins.
621 * @param srcLength The number of code units from that string to compare.
622 * @param options A bit set of options:
623 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
624 * Comparison in code unit order with default case folding.
625 *
626 * - U_COMPARE_CODE_POINT_ORDER
627 * Set to choose code point order instead of code unit order
628 * (see u_strCompare for details).
629 *
630 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
631 *
632 * @return A negative, zero, or positive integer indicating the comparison result.
633 * @stable ICU 2.0
634 */
635 inline int8_t caseCompare(int32_t start,
636 int32_t length,
637 const UnicodeString& srcText,
638 int32_t srcStart,
639 int32_t srcLength,
640 uint32_t options) const;
641
642 /**
643 * Compare two strings case-insensitively using full case folding.
644 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
645 *
646 * @param srcChars A pointer to another string to compare this one to.
647 * @param srcLength The number of code units from that string to compare.
648 * @param options A bit set of options:
649 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
650 * Comparison in code unit order with default case folding.
651 *
652 * - U_COMPARE_CODE_POINT_ORDER
653 * Set to choose code point order instead of code unit order
654 * (see u_strCompare for details).
655 *
656 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
657 *
658 * @return A negative, zero, or positive integer indicating the comparison result.
659 * @stable ICU 2.0
660 */
661 inline int8_t caseCompare(const UChar *srcChars,
662 int32_t srcLength,
663 uint32_t options) const;
664
665 /**
666 * Compare two strings case-insensitively using full case folding.
667 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
668 *
669 * @param start The start offset in this string at which the compare operation begins.
670 * @param length The number of code units from this string to compare.
671 * @param srcChars A pointer to another string to compare this one to.
672 * @param options A bit set of options:
673 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
674 * Comparison in code unit order with default case folding.
675 *
676 * - U_COMPARE_CODE_POINT_ORDER
677 * Set to choose code point order instead of code unit order
678 * (see u_strCompare for details).
679 *
680 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
681 *
682 * @return A negative, zero, or positive integer indicating the comparison result.
683 * @stable ICU 2.0
684 */
685 inline int8_t caseCompare(int32_t start,
686 int32_t length,
687 const UChar *srcChars,
688 uint32_t options) const;
689
690 /**
691 * Compare two strings case-insensitively using full case folding.
692 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
693 *
694 * @param start The start offset in this string at which the compare operation begins.
695 * @param length The number of code units from this string to compare.
696 * @param srcChars A pointer to another string to compare this one to.
697 * @param srcStart The start offset in that string at which the compare operation begins.
698 * @param srcLength The number of code units from that string to compare.
699 * @param options A bit set of options:
700 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
701 * Comparison in code unit order with default case folding.
702 *
703 * - U_COMPARE_CODE_POINT_ORDER
704 * Set to choose code point order instead of code unit order
705 * (see u_strCompare for details).
706 *
707 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
708 *
709 * @return A negative, zero, or positive integer indicating the comparison result.
710 * @stable ICU 2.0
711 */
712 inline int8_t caseCompare(int32_t start,
713 int32_t length,
714 const UChar *srcChars,
715 int32_t srcStart,
716 int32_t srcLength,
717 uint32_t options) const;
718
719 /**
720 * Compare two strings case-insensitively using full case folding.
721 * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
722 *
723 * @param start The start offset in this string at which the compare operation begins.
724 * @param limit The offset after the last code unit from this string to compare.
725 * @param srcText Another string to compare this one to.
726 * @param srcStart The start offset in that string at which the compare operation begins.
727 * @param srcLimit The offset after the last code unit from that string to compare.
728 * @param options A bit set of options:
729 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
730 * Comparison in code unit order with default case folding.
731 *
732 * - U_COMPARE_CODE_POINT_ORDER
733 * Set to choose code point order instead of code unit order
734 * (see u_strCompare for details).
735 *
736 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
737 *
738 * @return A negative, zero, or positive integer indicating the comparison result.
739 * @stable ICU 2.0
740 */
741 inline int8_t caseCompareBetween(int32_t start,
742 int32_t limit,
743 const UnicodeString& srcText,
744 int32_t srcStart,
745 int32_t srcLimit,
746 uint32_t options) const;
747
748 /**
749 * Determine if this starts with the characters in <TT>text</TT>
750 * @param text The text to match.
751 * @return TRUE if this starts with the characters in <TT>text</TT>,
752 * FALSE otherwise
753 * @stable ICU 2.0
754 */
755 inline UBool startsWith(const UnicodeString& text) const;
756
757 /**
758 * Determine if this starts with the characters in <TT>srcText</TT>
759 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
760 * @param srcText The text to match.
761 * @param srcStart the offset into <TT>srcText</TT> to start matching
762 * @param srcLength the number of characters in <TT>srcText</TT> to match
763 * @return TRUE if this starts with the characters in <TT>text</TT>,
764 * FALSE otherwise
765 * @stable ICU 2.0
766 */
767 inline UBool startsWith(const UnicodeString& srcText,
768 int32_t srcStart,
769 int32_t srcLength) const;
770
771 /**
772 * Determine if this starts with the characters in <TT>srcChars</TT>
773 * @param srcChars The characters to match.
774 * @param srcLength the number of characters in <TT>srcChars</TT>
775 * @return TRUE if this starts with the characters in <TT>srcChars</TT>,
776 * FALSE otherwise
777 * @stable ICU 2.0
778 */
779 inline UBool startsWith(const UChar *srcChars,
780 int32_t srcLength) const;
781
782 /**
783 * Determine if this ends with the characters in <TT>srcChars</TT>
784 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
785 * @param srcChars The characters to match.
786 * @param srcStart the offset into <TT>srcText</TT> to start matching
787 * @param srcLength the number of characters in <TT>srcChars</TT> to match
788 * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise
789 * @stable ICU 2.0
790 */
791 inline UBool startsWith(const UChar *srcChars,
792 int32_t srcStart,
793 int32_t srcLength) const;
794
795 /**
796 * Determine if this ends with the characters in <TT>text</TT>
797 * @param text The text to match.
798 * @return TRUE if this ends with the characters in <TT>text</TT>,
799 * FALSE otherwise
800 * @stable ICU 2.0
801 */
802 inline UBool endsWith(const UnicodeString& text) const;
803
804 /**
805 * Determine if this ends with the characters in <TT>srcText</TT>
806 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
807 * @param srcText The text to match.
808 * @param srcStart the offset into <TT>srcText</TT> to start matching
809 * @param srcLength the number of characters in <TT>srcText</TT> to match
810 * @return TRUE if this ends with the characters in <TT>text</TT>,
811 * FALSE otherwise
812 * @stable ICU 2.0
813 */
814 inline UBool endsWith(const UnicodeString& srcText,
815 int32_t srcStart,
816 int32_t srcLength) const;
817
818 /**
819 * Determine if this ends with the characters in <TT>srcChars</TT>
820 * @param srcChars The characters to match.
821 * @param srcLength the number of characters in <TT>srcChars</TT>
822 * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
823 * FALSE otherwise
824 * @stable ICU 2.0
825 */
826 inline UBool endsWith(const UChar *srcChars,
827 int32_t srcLength) const;
828
829 /**
830 * Determine if this ends with the characters in <TT>srcChars</TT>
831 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
832 * @param srcChars The characters to match.
833 * @param srcStart the offset into <TT>srcText</TT> to start matching
834 * @param srcLength the number of characters in <TT>srcChars</TT> to match
835 * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
836 * FALSE otherwise
837 * @stable ICU 2.0
838 */
839 inline UBool endsWith(const UChar *srcChars,
840 int32_t srcStart,
841 int32_t srcLength) const;
842
843
844 /* Searching - bitwise only */
845
846 /**
847 * Locate in this the first occurrence of the characters in <TT>text</TT>,
848 * using bitwise comparison.
849 * @param text The text to search for.
850 * @return The offset into this of the start of <TT>text</TT>,
851 * or -1 if not found.
852 * @stable ICU 2.0
853 */
854 inline int32_t indexOf(const UnicodeString& text) const;
855
856 /**
857 * Locate in this the first occurrence of the characters in <TT>text</TT>
858 * starting at offset <TT>start</TT>, using bitwise comparison.
859 * @param text The text to search for.
860 * @param start The offset at which searching will start.
861 * @return The offset into this of the start of <TT>text</TT>,
862 * or -1 if not found.
863 * @stable ICU 2.0
864 */
865 inline int32_t indexOf(const UnicodeString& text,
866 int32_t start) const;
867
868 /**
869 * Locate in this the first occurrence in the range
870 * [<TT>start</TT>, <TT>start + length</TT>) of the characters
871 * in <TT>text</TT>, using bitwise comparison.
872 * @param text The text to search for.
873 * @param start The offset at which searching will start.
874 * @param length The number of characters to search
875 * @return The offset into this of the start of <TT>text</TT>,
876 * or -1 if not found.
877 * @stable ICU 2.0
878 */
879 inline int32_t indexOf(const UnicodeString& text,
880 int32_t start,
881 int32_t length) const;
882
883 /**
884 * Locate in this the first occurrence in the range
885 * [<TT>start</TT>, <TT>start + length</TT>) of the characters
886 * in <TT>srcText</TT> in the range
887 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
888 * using bitwise comparison.
889 * @param srcText The text to search for.
890 * @param srcStart the offset into <TT>srcText</TT> at which
891 * to start matching
892 * @param srcLength the number of characters in <TT>srcText</TT> to match
893 * @param start the offset into this at which to start matching
894 * @param length the number of characters in this to search
895 * @return The offset into this of the start of <TT>text</TT>,
896 * or -1 if not found.
897 * @stable ICU 2.0
898 */
899 inline int32_t indexOf(const UnicodeString& srcText,
900 int32_t srcStart,
901 int32_t srcLength,
902 int32_t start,
903 int32_t length) const;
904
905 /**
906 * Locate in this the first occurrence of the characters in
907 * <TT>srcChars</TT>
908 * starting at offset <TT>start</TT>, using bitwise comparison.
909 * @param srcChars The text to search for.
910 * @param srcLength the number of characters in <TT>srcChars</TT> to match
911 * @param start the offset into this at which to start matching
912 * @return The offset into this of the start of <TT>text</TT>,
913 * or -1 if not found.
914 * @stable ICU 2.0
915 */
916 inline int32_t indexOf(const UChar *srcChars,
917 int32_t srcLength,
918 int32_t start) const;
919
920 /**
921 * Locate in this the first occurrence in the range
922 * [<TT>start</TT>, <TT>start + length</TT>) of the characters
923 * in <TT>srcChars</TT>, using bitwise comparison.
924 * @param srcChars The text to search for.
925 * @param srcLength the number of characters in <TT>srcChars</TT>
926 * @param start The offset at which searching will start.
927 * @param length The number of characters to search
928 * @return The offset into this of the start of <TT>srcChars</TT>,
929 * or -1 if not found.
930 * @stable ICU 2.0
931 */
932 inline int32_t indexOf(const UChar *srcChars,
933 int32_t srcLength,
934 int32_t start,
935 int32_t length) const;
936
937 /**
938 * Locate in this the first occurrence in the range
939 * [<TT>start</TT>, <TT>start + length</TT>) of the characters
940 * in <TT>srcChars</TT> in the range
941 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
942 * using bitwise comparison.
943 * @param srcChars The text to search for.
944 * @param srcStart the offset into <TT>srcChars</TT> at which
945 * to start matching
946 * @param srcLength the number of characters in <TT>srcChars</TT> to match
947 * @param start the offset into this at which to start matching
948 * @param length the number of characters in this to search
949 * @return The offset into this of the start of <TT>text</TT>,
950 * or -1 if not found.
951 * @stable ICU 2.0
952 */
953 int32_t indexOf(const UChar *srcChars,
954 int32_t srcStart,
955 int32_t srcLength,
956 int32_t start,
957 int32_t length) const;
958
959 /**
960 * Locate in this the first occurrence of the BMP code point <code>c</code>,
961 * using bitwise comparison.
962 * @param c The code unit to search for.
963 * @return The offset into this of <TT>c</TT>, or -1 if not found.
964 * @stable ICU 2.0
965 */
966 inline int32_t indexOf(UChar c) const;
967
968 /**
969 * Locate in this the first occurrence of the code point <TT>c</TT>,
970 * using bitwise comparison.
971 *
972 * @param c The code point to search for.
973 * @return The offset into this of <TT>c</TT>, or -1 if not found.
974 * @stable ICU 2.0
975 */
976 inline int32_t indexOf(UChar32 c) const;
977
978 /**
979 * Locate in this the first occurrence of the BMP code point <code>c</code>,
980 * starting at offset <TT>start</TT>, using bitwise comparison.
981 * @param c The code unit to search for.
982 * @param start The offset at which searching will start.
983 * @return The offset into this of <TT>c</TT>, or -1 if not found.
984 * @stable ICU 2.0
985 */
986 inline int32_t indexOf(UChar c,
987 int32_t start) const;
988
989 /**
990 * Locate in this the first occurrence of the code point <TT>c</TT>
991 * starting at offset <TT>start</TT>, using bitwise comparison.
992 *
993 * @param c The code point to search for.
994 * @param start The offset at which searching will start.
995 * @return The offset into this of <TT>c</TT>, or -1 if not found.
996 * @stable ICU 2.0
997 */
998 inline int32_t indexOf(UChar32 c,
999 int32_t start) const;
1000
1001 /**
1002 * Locate in this the first occurrence of the BMP code point <code>c</code>
1003 * in the range [<TT>start</TT>, <TT>start + length</TT>),
1004 * using bitwise comparison.
1005 * @param c The code unit to search for.
1006 * @param start the offset into this at which to start matching
1007 * @param length the number of characters in this to search
1008 * @return The offset into this of <TT>c</TT>, or -1 if not found.
1009 * @stable ICU 2.0
1010 */
1011 inline int32_t indexOf(UChar c,
1012 int32_t start,
1013 int32_t length) const;
1014
1015 /**
1016 * Locate in this the first occurrence of the code point <TT>c</TT>
1017 * in the range [<TT>start</TT>, <TT>start + length</TT>),
1018 * using bitwise comparison.
1019 *
1020 * @param c The code point to search for.
1021 * @param start the offset into this at which to start matching
1022 * @param length the number of characters in this to search
1023 * @return The offset into this of <TT>c</TT>, or -1 if not found.
1024 * @stable ICU 2.0
1025 */
1026 inline int32_t indexOf(UChar32 c,
1027 int32_t start,
1028 int32_t length) const;
1029
1030 /**
1031 * Locate in this the last occurrence of the characters in <TT>text</TT>,
1032 * using bitwise comparison.
1033 * @param text The text to search for.
1034 * @return The offset into this of the start of <TT>text</TT>,
1035 * or -1 if not found.
1036 * @stable ICU 2.0
1037 */
1038 inline int32_t lastIndexOf(const UnicodeString& text) const;
1039
1040 /**
1041 * Locate in this the last occurrence of the characters in <TT>text</TT>
1042 * starting at offset <TT>start</TT>, using bitwise comparison.
1043 * @param text The text to search for.
1044 * @param start The offset at which searching will start.
1045 * @return The offset into this of the start of <TT>text</TT>,
1046 * or -1 if not found.
1047 * @stable ICU 2.0
1048 */
1049 inline int32_t lastIndexOf(const UnicodeString& text,
1050 int32_t start) const;
1051
1052 /**
1053 * Locate in this the last occurrence in the range
1054 * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1055 * in <TT>text</TT>, using bitwise comparison.
1056 * @param text The text to search for.
1057 * @param start The offset at which searching will start.
1058 * @param length The number of characters to search
1059 * @return The offset into this of the start of <TT>text</TT>,
1060 * or -1 if not found.
1061 * @stable ICU 2.0
1062 */
1063 inline int32_t lastIndexOf(const UnicodeString& text,
1064 int32_t start,
1065 int32_t length) const;
1066
1067 /**
1068 * Locate in this the last occurrence in the range
1069 * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1070 * in <TT>srcText</TT> in the range
1071 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1072 * using bitwise comparison.
1073 * @param srcText The text to search for.
1074 * @param srcStart the offset into <TT>srcText</TT> at which
1075 * to start matching
1076 * @param srcLength the number of characters in <TT>srcText</TT> to match
1077 * @param start the offset into this at which to start matching
1078 * @param length the number of characters in this to search
1079 * @return The offset into this of the start of <TT>text</TT>,
1080 * or -1 if not found.
1081 * @stable ICU 2.0
1082 */
1083 inline int32_t lastIndexOf(const UnicodeString& srcText,
1084 int32_t srcStart,
1085 int32_t srcLength,
1086 int32_t start,
1087 int32_t length) const;
1088
1089 /**
1090 * Locate in this the last occurrence of the characters in <TT>srcChars</TT>
1091 * starting at offset <TT>start</TT>, using bitwise comparison.
1092 * @param srcChars The text to search for.
1093 * @param srcLength the number of characters in <TT>srcChars</TT> to match
1094 * @param start the offset into this at which to start matching
1095 * @return The offset into this of the start of <TT>text</TT>,
1096 * or -1 if not found.
1097 * @stable ICU 2.0
1098 */
1099 inline int32_t lastIndexOf(const UChar *srcChars,
1100 int32_t srcLength,
1101 int32_t start) const;
1102
1103 /**
1104 * Locate in this the last occurrence in the range
1105 * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1106 * in <TT>srcChars</TT>, using bitwise comparison.
1107 * @param srcChars The text to search for.
1108 * @param srcLength the number of characters in <TT>srcChars</TT>
1109 * @param start The offset at which searching will start.
1110 * @param length The number of characters to search
1111 * @return The offset into this of the start of <TT>srcChars</TT>,
1112 * or -1 if not found.
1113 * @stable ICU 2.0
1114 */
1115 inline int32_t lastIndexOf(const UChar *srcChars,
1116 int32_t srcLength,
1117 int32_t start,
1118 int32_t length) const;
1119
1120 /**
1121 * Locate in this the last occurrence in the range
1122 * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1123 * in <TT>srcChars</TT> in the range
1124 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1125 * using bitwise comparison.
1126 * @param srcChars The text to search for.
1127 * @param srcStart the offset into <TT>srcChars</TT> at which
1128 * to start matching
1129 * @param srcLength the number of characters in <TT>srcChars</TT> to match
1130 * @param start the offset into this at which to start matching
1131 * @param length the number of characters in this to search
1132 * @return The offset into this of the start of <TT>text</TT>,
1133 * or -1 if not found.
1134 * @stable ICU 2.0
1135 */
1136 int32_t lastIndexOf(const UChar *srcChars,
1137 int32_t srcStart,
1138 int32_t srcLength,
1139 int32_t start,
1140 int32_t length) const;
1141
1142 /**
1143 * Locate in this the last occurrence of the BMP code point <code>c</code>,
1144 * using bitwise comparison.
1145 * @param c The code unit to search for.
1146 * @return The offset into this of <TT>c</TT>, or -1 if not found.
1147 * @stable ICU 2.0
1148 */
1149 inline int32_t lastIndexOf(UChar c) const;
1150
1151 /**
1152 * Locate in this the last occurrence of the code point <TT>c</TT>,
1153 * using bitwise comparison.
1154 *
1155 * @param c The code point to search for.
1156 * @return The offset into this of <TT>c</TT>, or -1 if not found.
1157 * @stable ICU 2.0
1158 */
1159 inline int32_t lastIndexOf(UChar32 c) const;
1160
1161 /**
1162 * Locate in this the last occurrence of the BMP code point <code>c</code>
1163 * starting at offset <TT>start</TT>, using bitwise comparison.
1164 * @param c The code unit to search for.
1165 * @param start The offset at which searching will start.
1166 * @return The offset into this of <TT>c</TT>, or -1 if not found.
1167 * @stable ICU 2.0
1168 */
1169 inline int32_t lastIndexOf(UChar c,
1170 int32_t start) const;
1171
1172 /**
1173 * Locate in this the last occurrence of the code point <TT>c</TT>
1174 * starting at offset <TT>start</TT>, using bitwise comparison.
1175 *
1176 * @param c The code point to search for.
1177 * @param start The offset at which searching will start.
1178 * @return The offset into this of <TT>c</TT>, or -1 if not found.
1179 * @stable ICU 2.0
1180 */
1181 inline int32_t lastIndexOf(UChar32 c,
1182 int32_t start) const;
1183
1184 /**
1185 * Locate in this the last occurrence of the BMP code point <code>c</code>
1186 * in the range [<TT>start</TT>, <TT>start + length</TT>),
1187 * using bitwise comparison.
1188 * @param c The code unit to search for.
1189 * @param start the offset into this at which to start matching
1190 * @param length the number of characters in this to search
1191 * @return The offset into this of <TT>c</TT>, or -1 if not found.
1192 * @stable ICU 2.0
1193 */
1194 inline int32_t lastIndexOf(UChar c,
1195 int32_t start,
1196 int32_t length) const;
1197
1198 /**
1199 * Locate in this the last occurrence of the code point <TT>c</TT>
1200 * in the range [<TT>start</TT>, <TT>start + length</TT>),
1201 * using bitwise comparison.
1202 *
1203 * @param c The code point to search for.
1204 * @param start the offset into this at which to start matching
1205 * @param length the number of characters in this to search
1206 * @return The offset into this of <TT>c</TT>, or -1 if not found.
1207 * @stable ICU 2.0
1208 */
1209 inline int32_t lastIndexOf(UChar32 c,
1210 int32_t start,
1211 int32_t length) const;
1212
1213
1214 /* Character access */
1215
1216 /**
1217 * Return the code unit at offset <tt>offset</tt>.
1218 * If the offset is not valid (0..length()-1) then U+ffff is returned.
1219 * @param offset a valid offset into the text
1220 * @return the code unit at offset <tt>offset</tt>
1221 * or 0xffff if the offset is not valid for this string
1222 * @stable ICU 2.0
1223 */
1224 inline UChar charAt(int32_t offset) const;
1225
1226 /**
1227 * Return the code unit at offset <tt>offset</tt>.
1228 * If the offset is not valid (0..length()-1) then U+ffff is returned.
1229 * @param offset a valid offset into the text
1230 * @return the code unit at offset <tt>offset</tt>
1231 * @stable ICU 2.0
1232 */
1233 inline UChar operator[] (int32_t offset) const;
1234
1235 /**
1236 * Return the code point that contains the code unit
1237 * at offset <tt>offset</tt>.
1238 * If the offset is not valid (0..length()-1) then U+ffff is returned.
1239 * @param offset a valid offset into the text
1240 * that indicates the text offset of any of the code units
1241 * that will be assembled into a code point (21-bit value) and returned
1242 * @return the code point of text at <tt>offset</tt>
1243 * or 0xffff if the offset is not valid for this string
1244 * @stable ICU 2.0
1245 */
1246 inline UChar32 char32At(int32_t offset) const;
1247
1248 /**
1249 * Adjust a random-access offset so that
1250 * it points to the beginning of a Unicode character.
1251 * The offset that is passed in points to
1252 * any code unit of a code point,
1253 * while the returned offset will point to the first code unit
1254 * of the same code point.
1255 * In UTF-16, if the input offset points to a second surrogate
1256 * of a surrogate pair, then the returned offset will point
1257 * to the first surrogate.
1258 * @param offset a valid offset into one code point of the text
1259 * @return offset of the first code unit of the same code point
1260 * @see U16_SET_CP_START
1261 * @stable ICU 2.0
1262 */
1263 inline int32_t getChar32Start(int32_t offset) const;
1264
1265 /**
1266 * Adjust a random-access offset so that
1267 * it points behind a Unicode character.
1268 * The offset that is passed in points behind
1269 * any code unit of a code point,
1270 * while the returned offset will point behind the last code unit
1271 * of the same code point.
1272 * In UTF-16, if the input offset points behind the first surrogate
1273 * (i.e., to the second surrogate)
1274 * of a surrogate pair, then the returned offset will point
1275 * behind the second surrogate (i.e., to the first surrogate).
1276 * @param offset a valid offset after any code unit of a code point of the text
1277 * @return offset of the first code unit after the same code point
1278 * @see U16_SET_CP_LIMIT
1279 * @stable ICU 2.0
1280 */
1281 inline int32_t getChar32Limit(int32_t offset) const;
1282
1283 /**
1284 * Move the code unit index along the string by delta code points.
1285 * Interpret the input index as a code unit-based offset into the string,
1286 * move the index forward or backward by delta code points, and
1287 * return the resulting index.
1288 * The input index should point to the first code unit of a code point,
1289 * if there is more than one.
1290 *
1291 * Both input and output indexes are code unit-based as for all
1292 * string indexes/offsets in ICU (and other libraries, like MBCS char*).
1293 * If delta<0 then the index is moved backward (toward the start of the string).
1294 * If delta>0 then the index is moved forward (toward the end of the string).
1295 *
1296 * This behaves like CharacterIterator::move32(delta, kCurrent).
1297 *
1298 * Behavior for out-of-bounds indexes:
1299 * <code>moveIndex32</code> pins the input index to 0..length(), i.e.,
1300 * if the input index<0 then it is pinned to 0;
1301 * if it is index>length() then it is pinned to length().
1302 * Afterwards, the index is moved by <code>delta</code> code points
1303 * forward or backward,
1304 * but no further backward than to 0 and no further forward than to length().
1305 * The resulting index return value will be in between 0 and length(), inclusively.
1306 *
1307 * Examples:
1308 * <pre>
1309 * // s has code points 'a' U+10000 'b' U+10ffff U+2029
1310 * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape();
1311 *
1312 * // initial index: position of U+10000
1313 * int32_t index=1;
1314 *
1315 * // the following examples will all result in index==4, position of U+10ffff
1316 *
1317 * // skip 2 code points from some position in the string
1318 * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
1319 *
1320 * // go to the 3rd code point from the start of s (0-based)
1321 * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
1322 *
1323 * // go to the next-to-last code point of s
1324 * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
1325 * </pre>
1326 *
1327 * @param index input code unit index
1328 * @param delta (signed) code point count to move the index forward or backward
1329 * in the string
1330 * @return the resulting code unit index
1331 * @stable ICU 2.0
1332 */
1333 int32_t moveIndex32(int32_t index, int32_t delta) const;
1334
1335 /* Substring extraction */
1336
1337 /**
1338 * Copy the characters in the range
1339 * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,
1340 * beginning at <tt>dstStart</tt>.
1341 * If the string aliases to <code>dst</code> itself as an external buffer,
1342 * then extract() will not copy the contents.
1343 *
1344 * @param start offset of first character which will be copied into the array
1345 * @param length the number of characters to extract
1346 * @param dst array in which to copy characters. The length of <tt>dst</tt>
1347 * must be at least (<tt>dstStart + length</tt>).
1348 * @param dstStart the offset in <TT>dst</TT> where the first character
1349 * will be extracted
1350 * @stable ICU 2.0
1351 */
1352 inline void extract(int32_t start,
1353 int32_t length,
1354 UChar *dst,
1355 int32_t dstStart = 0) const;
1356
1357 /**
1358 * Copy the contents of the string into dest.
1359 * This is a convenience function that
1360 * checks if there is enough space in dest,
1361 * extracts the entire string if possible,
1362 * and NUL-terminates dest if possible.
1363 *
1364 * If the string fits into dest but cannot be NUL-terminated
1365 * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
1366 * If the string itself does not fit into dest
1367 * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
1368 *
1369 * If the string aliases to <code>dest</code> itself as an external buffer,
1370 * then extract() will not copy the contents.
1371 *
1372 * @param dest Destination string buffer.
1373 * @param destCapacity Number of UChars available at dest.
1374 * @param errorCode ICU error code.
1375 * @return length()
1376 * @stable ICU 2.0
1377 */
1378 int32_t
1379 extract(UChar *dest, int32_t destCapacity,
1380 UErrorCode &errorCode) const;
1381
1382 /**
1383 * Copy the characters in the range
1384 * [<tt>start</tt>, <tt>start + length</tt>) into the UnicodeString
1385 * <tt>target</tt>.
1386 * @param start offset of first character which will be copied
1387 * @param length the number of characters to extract
1388 * @param target UnicodeString into which to copy characters.
1389 * @return A reference to <TT>target</TT>
1390 * @stable ICU 2.0
1391 */
1392 inline void extract(int32_t start,
1393 int32_t length,
1394 UnicodeString& target) const;
1395
1396 /**
1397 * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
1398 * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>.
1399 * @param start offset of first character which will be copied into the array
1400 * @param limit offset immediately following the last character to be copied
1401 * @param dst array in which to copy characters. The length of <tt>dst</tt>
1402 * must be at least (<tt>dstStart + (limit - start)</tt>).
1403 * @param dstStart the offset in <TT>dst</TT> where the first character
1404 * will be extracted
1405 * @stable ICU 2.0
1406 */
1407 inline void extractBetween(int32_t start,
1408 int32_t limit,
1409 UChar *dst,
1410 int32_t dstStart = 0) const;
1411
1412 /**
1413 * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
1414 * into the UnicodeString <tt>target</tt>. Replaceable API.
1415 * @param start offset of first character which will be copied
1416 * @param limit offset immediately following the last character to be copied
1417 * @param target UnicodeString into which to copy characters.
1418 * @return A reference to <TT>target</TT>
1419 * @stable ICU 2.0
1420 */
1421 virtual void extractBetween(int32_t start,
1422 int32_t limit,
1423 UnicodeString& target) const;
1424
1425 /**
1426 * Copy the characters in the range
1427 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters.
1428 * All characters must be invariant (see utypes.h).
1429 * Use US_INV as the last, signature-distinguishing parameter.
1430 *
1431 * This function does not write any more than <code>targetLength</code>
1432 * characters but returns the length of the entire output string
1433 * so that one can allocate a larger buffer and call the function again
1434 * if necessary.
1435 * The output string is NUL-terminated if possible.
1436 *
1437 * @param start offset of first character which will be copied
1438 * @param startLength the number of characters to extract
1439 * @param target the target buffer for extraction, can be NULL
1440 * if targetLength is 0
1441 * @param targetCapacity the length of the target buffer
1442 * @param inv Signature-distinguishing paramater, use US_INV.
1443 * @return the output string length, not including the terminating NUL
1444 * @stable ICU 3.2
1445 */
1446 int32_t extract(int32_t start,
1447 int32_t startLength,
1448 char *target,
1449 int32_t targetCapacity,
1450 enum EInvariant inv) const;
1451
1452 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
1453
1454 /**
1455 * Copy the characters in the range
1456 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1457 * in the platform's default codepage.
1458 * This function does not write any more than <code>targetLength</code>
1459 * characters but returns the length of the entire output string
1460 * so that one can allocate a larger buffer and call the function again
1461 * if necessary.
1462 * The output string is NUL-terminated if possible.
1463 *
1464 * @param start offset of first character which will be copied
1465 * @param startLength the number of characters to extract
1466 * @param target the target buffer for extraction
1467 * @param targetLength the length of the target buffer
1468 * If <TT>target</TT> is NULL, then the number of bytes required for
1469 * <TT>target</TT> is returned.
1470 * @return the output string length, not including the terminating NUL
1471 * @stable ICU 2.0
1472 */
1473 int32_t extract(int32_t start,
1474 int32_t startLength,
1475 char *target,
1476 uint32_t targetLength) const;
1477
1478 #endif
1479
1480 #if !UCONFIG_NO_CONVERSION
1481
1482 /**
1483 * Copy the characters in the range
1484 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1485 * in a specified codepage.
1486 * The output string is NUL-terminated.
1487 *
1488 * Recommendation: For invariant-character strings use
1489 * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1490 * because it avoids object code dependencies of UnicodeString on
1491 * the conversion code.
1492 *
1493 * @param start offset of first character which will be copied
1494 * @param startLength the number of characters to extract
1495 * @param target the target buffer for extraction
1496 * @param codepage the desired codepage for the characters. 0 has
1497 * the special meaning of the default codepage
1498 * If <code>codepage</code> is an empty string (<code>""</code>),
1499 * then a simple conversion is performed on the codepage-invariant
1500 * subset ("invariant characters") of the platform encoding. See utypes.h.
1501 * If <TT>target</TT> is NULL, then the number of bytes required for
1502 * <TT>target</TT> is returned. It is assumed that the target is big enough
1503 * to fit all of the characters.
1504 * @return the output string length, not including the terminating NUL
1505 * @stable ICU 2.0
1506 */
1507 inline int32_t extract(int32_t start,
1508 int32_t startLength,
1509 char *target,
1510 const char *codepage = 0) const;
1511
1512 /**
1513 * Copy the characters in the range
1514 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1515 * in a specified codepage.
1516 * This function does not write any more than <code>targetLength</code>
1517 * characters but returns the length of the entire output string
1518 * so that one can allocate a larger buffer and call the function again
1519 * if necessary.
1520 * The output string is NUL-terminated if possible.
1521 *
1522 * Recommendation: For invariant-character strings use
1523 * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1524 * because it avoids object code dependencies of UnicodeString on
1525 * the conversion code.
1526 *
1527 * @param start offset of first character which will be copied
1528 * @param startLength the number of characters to extract
1529 * @param target the target buffer for extraction
1530 * @param targetLength the length of the target buffer
1531 * @param codepage the desired codepage for the characters. 0 has
1532 * the special meaning of the default codepage
1533 * If <code>codepage</code> is an empty string (<code>""</code>),
1534 * then a simple conversion is performed on the codepage-invariant
1535 * subset ("invariant characters") of the platform encoding. See utypes.h.
1536 * If <TT>target</TT> is NULL, then the number of bytes required for
1537 * <TT>target</TT> is returned.
1538 * @return the output string length, not including the terminating NUL
1539 * @stable ICU 2.0
1540 */
1541 int32_t extract(int32_t start,
1542 int32_t startLength,
1543 char *target,
1544 uint32_t targetLength,
1545 const char *codepage) const;
1546
1547 /**
1548 * Convert the UnicodeString into a codepage string using an existing UConverter.
1549 * The output string is NUL-terminated if possible.
1550 *
1551 * This function avoids the overhead of opening and closing a converter if
1552 * multiple strings are extracted.
1553 *
1554 * @param dest destination string buffer, can be NULL if destCapacity==0
1555 * @param destCapacity the number of chars available at dest
1556 * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
1557 * or NULL for the default converter
1558 * @param errorCode normal ICU error code
1559 * @return the length of the output string, not counting the terminating NUL;
1560 * if the length is greater than destCapacity, then the string will not fit
1561 * and a buffer of the indicated length would need to be passed in
1562 * @stable ICU 2.0
1563 */
1564 int32_t extract(char *dest, int32_t destCapacity,
1565 UConverter *cnv,
1566 UErrorCode &errorCode) const;
1567
1568 #endif
1569
1570 /**
1571 * Create a temporary substring for the specified range.
1572 * Unlike the substring constructor and setTo() functions,
1573 * the object returned here will be a read-only alias (using getBuffer())
1574 * rather than copying the text.
1575 * As a result, this substring operation is much faster but requires
1576 * that the original string not be modified or deleted during the lifetime
1577 * of the returned substring object.
1578 * @param start offset of the first character visible in the substring
1579 * @param length length of the substring
1580 * @return a read-only alias UnicodeString object for the substring
1581 * @stable ICU 4.4
1582 */
1583 UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
1584
1585 /**
1586 * Create a temporary substring for the specified range.
1587 * Same as tempSubString(start, length) except that the substring range
1588 * is specified as a (start, limit) pair (with an exclusive limit index)
1589 * rather than a (start, length) pair.
1590 * @param start offset of the first character visible in the substring
1591 * @param limit offset immediately following the last character visible in the substring
1592 * @return a read-only alias UnicodeString object for the substring
1593 * @stable ICU 4.4
1594 */
1595 inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
1596
1597 /**
1598 * Convert the UnicodeString to UTF-8 and write the result
1599 * to a ByteSink. This is called by toUTF8String().
1600 * Unpaired surrogates are replaced with U+FFFD.
1601 * Calls u_strToUTF8WithSub().
1602 *
1603 * @param sink A ByteSink to which the UTF-8 version of the string is written.
1604 * sink.Flush() is called at the end.
1605 * @stable ICU 4.2
1606 * @see toUTF8String
1607 */
1608 void toUTF8(ByteSink &sink) const;
1609
1610 #if U_HAVE_STD_STRING
1611
1612 /**
1613 * Convert the UnicodeString to UTF-8 and append the result
1614 * to a standard string.
1615 * Unpaired surrogates are replaced with U+FFFD.
1616 * Calls toUTF8().
1617 *
1618 * @param result A standard string (or a compatible object)
1619 * to which the UTF-8 version of the string is appended.
1620 * @return The string object.
1621 * @stable ICU 4.2
1622 * @see toUTF8
1623 */
1624 template<typename StringClass>
toUTF8String(StringClass & result)1625 StringClass &toUTF8String(StringClass &result) const {
1626 StringByteSink<StringClass> sbs(&result);
1627 toUTF8(sbs);
1628 return result;
1629 }
1630
1631 #endif
1632
1633 /**
1634 * Convert the UnicodeString to UTF-32.
1635 * Unpaired surrogates are replaced with U+FFFD.
1636 * Calls u_strToUTF32WithSub().
1637 *
1638 * @param utf32 destination string buffer, can be NULL if capacity==0
1639 * @param capacity the number of UChar32s available at utf32
1640 * @param errorCode Standard ICU error code. Its input value must
1641 * pass the U_SUCCESS() test, or else the function returns
1642 * immediately. Check for U_FAILURE() on output or use with
1643 * function chaining. (See User Guide for details.)
1644 * @return The length of the UTF-32 string.
1645 * @see fromUTF32
1646 * @stable ICU 4.2
1647 */
1648 int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
1649
1650 /* Length operations */
1651
1652 /**
1653 * Return the length of the UnicodeString object.
1654 * The length is the number of UChar code units are in the UnicodeString.
1655 * If you want the number of code points, please use countChar32().
1656 * @return the length of the UnicodeString object
1657 * @see countChar32
1658 * @stable ICU 2.0
1659 */
1660 inline int32_t length(void) const;
1661
1662 /**
1663 * Count Unicode code points in the length UChar code units of the string.
1664 * A code point may occupy either one or two UChar code units.
1665 * Counting code points involves reading all code units.
1666 *
1667 * This functions is basically the inverse of moveIndex32().
1668 *
1669 * @param start the index of the first code unit to check
1670 * @param length the number of UChar code units to check
1671 * @return the number of code points in the specified code units
1672 * @see length
1673 * @stable ICU 2.0
1674 */
1675 int32_t
1676 countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
1677
1678 /**
1679 * Check if the length UChar code units of the string
1680 * contain more Unicode code points than a certain number.
1681 * This is more efficient than counting all code points in this part of the string
1682 * and comparing that number with a threshold.
1683 * This function may not need to scan the string at all if the length
1684 * falls within a certain range, and
1685 * never needs to count more than 'number+1' code points.
1686 * Logically equivalent to (countChar32(start, length)>number).
1687 * A Unicode code point may occupy either one or two UChar code units.
1688 *
1689 * @param start the index of the first code unit to check (0 for the entire string)
1690 * @param length the number of UChar code units to check
1691 * (use INT32_MAX for the entire string; remember that start/length
1692 * values are pinned)
1693 * @param number The number of code points in the (sub)string is compared against
1694 * the 'number' parameter.
1695 * @return Boolean value for whether the string contains more Unicode code points
1696 * than 'number'. Same as (u_countChar32(s, length)>number).
1697 * @see countChar32
1698 * @see u_strHasMoreChar32Than
1699 * @stable ICU 2.4
1700 */
1701 UBool
1702 hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
1703
1704 /**
1705 * Determine if this string is empty.
1706 * @return TRUE if this string contains 0 characters, FALSE otherwise.
1707 * @stable ICU 2.0
1708 */
1709 inline UBool isEmpty(void) const;
1710
1711 /**
1712 * Return the capacity of the internal buffer of the UnicodeString object.
1713 * This is useful together with the getBuffer functions.
1714 * See there for details.
1715 *
1716 * @return the number of UChars available in the internal buffer
1717 * @see getBuffer
1718 * @stable ICU 2.0
1719 */
1720 inline int32_t getCapacity(void) const;
1721
1722 /* Other operations */
1723
1724 /**
1725 * Generate a hash code for this object.
1726 * @return The hash code of this UnicodeString.
1727 * @stable ICU 2.0
1728 */
1729 inline int32_t hashCode(void) const;
1730
1731 /**
1732 * Determine if this object contains a valid string.
1733 * A bogus string has no value. It is different from an empty string,
1734 * although in both cases isEmpty() returns TRUE and length() returns 0.
1735 * setToBogus() and isBogus() can be used to indicate that no string value is available.
1736 * For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and
1737 * length() returns 0.
1738 *
1739 * @return TRUE if the string is valid, FALSE otherwise
1740 * @see setToBogus()
1741 * @stable ICU 2.0
1742 */
1743 inline UBool isBogus(void) const;
1744
1745
1746 //========================================
1747 // Write operations
1748 //========================================
1749
1750 /* Assignment operations */
1751
1752 /**
1753 * Assignment operator. Replace the characters in this UnicodeString
1754 * with the characters from <TT>srcText</TT>.
1755 * @param srcText The text containing the characters to replace
1756 * @return a reference to this
1757 * @stable ICU 2.0
1758 */
1759 UnicodeString &operator=(const UnicodeString &srcText);
1760
1761 /**
1762 * Almost the same as the assignment operator.
1763 * Replace the characters in this UnicodeString
1764 * with the characters from <code>srcText</code>.
1765 *
1766 * This function works the same for all strings except for ones that
1767 * are readonly aliases.
1768 * Starting with ICU 2.4, the assignment operator and the copy constructor
1769 * allocate a new buffer and copy the buffer contents even for readonly aliases.
1770 * This function implements the old, more efficient but less safe behavior
1771 * of making this string also a readonly alias to the same buffer.
1772 * The fastCopyFrom function must be used only if it is known that the lifetime of
1773 * this UnicodeString is at least as long as the lifetime of the aliased buffer
1774 * including its contents, for example for strings from resource bundles
1775 * or aliases to string contents.
1776 *
1777 * @param src The text containing the characters to replace.
1778 * @return a reference to this
1779 * @stable ICU 2.4
1780 */
1781 UnicodeString &fastCopyFrom(const UnicodeString &src);
1782
1783 /**
1784 * Assignment operator. Replace the characters in this UnicodeString
1785 * with the code unit <TT>ch</TT>.
1786 * @param ch the code unit to replace
1787 * @return a reference to this
1788 * @stable ICU 2.0
1789 */
1790 inline UnicodeString& operator= (UChar ch);
1791
1792 /**
1793 * Assignment operator. Replace the characters in this UnicodeString
1794 * with the code point <TT>ch</TT>.
1795 * @param ch the code point to replace
1796 * @return a reference to this
1797 * @stable ICU 2.0
1798 */
1799 inline UnicodeString& operator= (UChar32 ch);
1800
1801 /**
1802 * Set the text in the UnicodeString object to the characters
1803 * in <TT>srcText</TT> in the range
1804 * [<TT>srcStart</TT>, <TT>srcText.length()</TT>).
1805 * <TT>srcText</TT> is not modified.
1806 * @param srcText the source for the new characters
1807 * @param srcStart the offset into <TT>srcText</TT> where new characters
1808 * will be obtained
1809 * @return a reference to this
1810 * @stable ICU 2.2
1811 */
1812 inline UnicodeString& setTo(const UnicodeString& srcText,
1813 int32_t srcStart);
1814
1815 /**
1816 * Set the text in the UnicodeString object to the characters
1817 * in <TT>srcText</TT> in the range
1818 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
1819 * <TT>srcText</TT> is not modified.
1820 * @param srcText the source for the new characters
1821 * @param srcStart the offset into <TT>srcText</TT> where new characters
1822 * will be obtained
1823 * @param srcLength the number of characters in <TT>srcText</TT> in the
1824 * replace string.
1825 * @return a reference to this
1826 * @stable ICU 2.0
1827 */
1828 inline UnicodeString& setTo(const UnicodeString& srcText,
1829 int32_t srcStart,
1830 int32_t srcLength);
1831
1832 /**
1833 * Set the text in the UnicodeString object to the characters in
1834 * <TT>srcText</TT>.
1835 * <TT>srcText</TT> is not modified.
1836 * @param srcText the source for the new characters
1837 * @return a reference to this
1838 * @stable ICU 2.0
1839 */
1840 inline UnicodeString& setTo(const UnicodeString& srcText);
1841
1842 /**
1843 * Set the characters in the UnicodeString object to the characters
1844 * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
1845 * @param srcChars the source for the new characters
1846 * @param srcLength the number of Unicode characters in srcChars.
1847 * @return a reference to this
1848 * @stable ICU 2.0
1849 */
1850 inline UnicodeString& setTo(const UChar *srcChars,
1851 int32_t srcLength);
1852
1853 /**
1854 * Set the characters in the UnicodeString object to the code unit
1855 * <TT>srcChar</TT>.
1856 * @param srcChar the code unit which becomes the UnicodeString's character
1857 * content
1858 * @return a reference to this
1859 * @stable ICU 2.0
1860 */
1861 UnicodeString& setTo(UChar srcChar);
1862
1863 /**
1864 * Set the characters in the UnicodeString object to the code point
1865 * <TT>srcChar</TT>.
1866 * @param srcChar the code point which becomes the UnicodeString's character
1867 * content
1868 * @return a reference to this
1869 * @stable ICU 2.0
1870 */
1871 UnicodeString& setTo(UChar32 srcChar);
1872
1873 /**
1874 * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor.
1875 * The text will be used for the UnicodeString object, but
1876 * it will not be released when the UnicodeString is destroyed.
1877 * This has copy-on-write semantics:
1878 * When the string is modified, then the buffer is first copied into
1879 * newly allocated memory.
1880 * The aliased buffer is never modified.
1881 * In an assignment to another UnicodeString, the text will be aliased again,
1882 * so that both strings then alias the same readonly-text.
1883 *
1884 * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
1885 * This must be true if <code>textLength==-1</code>.
1886 * @param text The characters to alias for the UnicodeString.
1887 * @param textLength The number of Unicode characters in <code>text</code> to alias.
1888 * If -1, then this constructor will determine the length
1889 * by calling <code>u_strlen()</code>.
1890 * @return a reference to this
1891 * @stable ICU 2.0
1892 */
1893 UnicodeString &setTo(UBool isTerminated,
1894 const UChar *text,
1895 int32_t textLength);
1896
1897 /**
1898 * Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor.
1899 * The text will be used for the UnicodeString object, but
1900 * it will not be released when the UnicodeString is destroyed.
1901 * This has write-through semantics:
1902 * For as long as the capacity of the buffer is sufficient, write operations
1903 * will directly affect the buffer. When more capacity is necessary, then
1904 * a new buffer will be allocated and the contents copied as with regularly
1905 * constructed strings.
1906 * In an assignment to another UnicodeString, the buffer will be copied.
1907 * The extract(UChar *dst) function detects whether the dst pointer is the same
1908 * as the string buffer itself and will in this case not copy the contents.
1909 *
1910 * @param buffer The characters to alias for the UnicodeString.
1911 * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
1912 * @param buffCapacity The size of <code>buffer</code> in UChars.
1913 * @return a reference to this
1914 * @stable ICU 2.0
1915 */
1916 UnicodeString &setTo(UChar *buffer,
1917 int32_t buffLength,
1918 int32_t buffCapacity);
1919
1920 /**
1921 * Make this UnicodeString object invalid.
1922 * The string will test TRUE with isBogus().
1923 *
1924 * A bogus string has no value. It is different from an empty string.
1925 * It can be used to indicate that no string value is available.
1926 * getBuffer() and getTerminatedBuffer() return NULL, and
1927 * length() returns 0.
1928 *
1929 * This utility function is used throughout the UnicodeString
1930 * implementation to indicate that a UnicodeString operation failed,
1931 * and may be used in other functions,
1932 * especially but not exclusively when such functions do not
1933 * take a UErrorCode for simplicity.
1934 *
1935 * The following methods, and no others, will clear a string object's bogus flag:
1936 * - remove()
1937 * - remove(0, INT32_MAX)
1938 * - truncate(0)
1939 * - operator=() (assignment operator)
1940 * - setTo(...)
1941 *
1942 * The simplest ways to turn a bogus string into an empty one
1943 * is to use the remove() function.
1944 * Examples for other functions that are equivalent to "set to empty string":
1945 * \code
1946 * if(s.isBogus()) {
1947 * s.remove(); // set to an empty string (remove all), or
1948 * s.remove(0, INT32_MAX); // set to an empty string (remove all), or
1949 * s.truncate(0); // set to an empty string (complete truncation), or
1950 * s=UnicodeString(); // assign an empty string, or
1951 * s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
1952 * static const UChar nul=0;
1953 * s.setTo(&nul, 0); // set to an empty C Unicode string
1954 * }
1955 * \endcode
1956 *
1957 * @see isBogus()
1958 * @stable ICU 2.0
1959 */
1960 void setToBogus();
1961
1962 /**
1963 * Set the character at the specified offset to the specified character.
1964 * @param offset A valid offset into the text of the character to set
1965 * @param ch The new character
1966 * @return A reference to this
1967 * @stable ICU 2.0
1968 */
1969 UnicodeString& setCharAt(int32_t offset,
1970 UChar ch);
1971
1972
1973 /* Append operations */
1974
1975 /**
1976 * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString
1977 * object.
1978 * @param ch the code unit to be appended
1979 * @return a reference to this
1980 * @stable ICU 2.0
1981 */
1982 inline UnicodeString& operator+= (UChar ch);
1983
1984 /**
1985 * Append operator. Append the code point <TT>ch</TT> to the UnicodeString
1986 * object.
1987 * @param ch the code point to be appended
1988 * @return a reference to this
1989 * @stable ICU 2.0
1990 */
1991 inline UnicodeString& operator+= (UChar32 ch);
1992
1993 /**
1994 * Append operator. Append the characters in <TT>srcText</TT> to the
1995 * UnicodeString object. <TT>srcText</TT> is not modified.
1996 * @param srcText the source for the new characters
1997 * @return a reference to this
1998 * @stable ICU 2.0
1999 */
2000 inline UnicodeString& operator+= (const UnicodeString& srcText);
2001
2002 /**
2003 * Append the characters
2004 * in <TT>srcText</TT> in the range
2005 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the
2006 * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT>
2007 * is not modified.
2008 * @param srcText the source for the new characters
2009 * @param srcStart the offset into <TT>srcText</TT> where new characters
2010 * will be obtained
2011 * @param srcLength the number of characters in <TT>srcText</TT> in
2012 * the append string
2013 * @return a reference to this
2014 * @stable ICU 2.0
2015 */
2016 inline UnicodeString& append(const UnicodeString& srcText,
2017 int32_t srcStart,
2018 int32_t srcLength);
2019
2020 /**
2021 * Append the characters in <TT>srcText</TT> to the UnicodeString object.
2022 * <TT>srcText</TT> is not modified.
2023 * @param srcText the source for the new characters
2024 * @return a reference to this
2025 * @stable ICU 2.0
2026 */
2027 inline UnicodeString& append(const UnicodeString& srcText);
2028
2029 /**
2030 * Append the characters in <TT>srcChars</TT> in the range
2031 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString
2032 * object at offset
2033 * <TT>start</TT>. <TT>srcChars</TT> is not modified.
2034 * @param srcChars the source for the new characters
2035 * @param srcStart the offset into <TT>srcChars</TT> where new characters
2036 * will be obtained
2037 * @param srcLength the number of characters in <TT>srcChars</TT> in
2038 * the append string; can be -1 if <TT>srcChars</TT> is NUL-terminated
2039 * @return a reference to this
2040 * @stable ICU 2.0
2041 */
2042 inline UnicodeString& append(const UChar *srcChars,
2043 int32_t srcStart,
2044 int32_t srcLength);
2045
2046 /**
2047 * Append the characters in <TT>srcChars</TT> to the UnicodeString object
2048 * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2049 * @param srcChars the source for the new characters
2050 * @param srcLength the number of Unicode characters in <TT>srcChars</TT>;
2051 * can be -1 if <TT>srcChars</TT> is NUL-terminated
2052 * @return a reference to this
2053 * @stable ICU 2.0
2054 */
2055 inline UnicodeString& append(const UChar *srcChars,
2056 int32_t srcLength);
2057
2058 /**
2059 * Append the code unit <TT>srcChar</TT> to the UnicodeString object.
2060 * @param srcChar the code unit to append
2061 * @return a reference to this
2062 * @stable ICU 2.0
2063 */
2064 inline UnicodeString& append(UChar srcChar);
2065
2066 /**
2067 * Append the code point <TT>srcChar</TT> to the UnicodeString object.
2068 * @param srcChar the code point to append
2069 * @return a reference to this
2070 * @stable ICU 2.0
2071 */
2072 inline UnicodeString& append(UChar32 srcChar);
2073
2074
2075 /* Insert operations */
2076
2077 /**
2078 * Insert the characters in <TT>srcText</TT> in the range
2079 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
2080 * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
2081 * @param start the offset where the insertion begins
2082 * @param srcText the source for the new characters
2083 * @param srcStart the offset into <TT>srcText</TT> where new characters
2084 * will be obtained
2085 * @param srcLength the number of characters in <TT>srcText</TT> in
2086 * the insert string
2087 * @return a reference to this
2088 * @stable ICU 2.0
2089 */
2090 inline UnicodeString& insert(int32_t start,
2091 const UnicodeString& srcText,
2092 int32_t srcStart,
2093 int32_t srcLength);
2094
2095 /**
2096 * Insert the characters in <TT>srcText</TT> into the UnicodeString object
2097 * at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
2098 * @param start the offset where the insertion begins
2099 * @param srcText the source for the new characters
2100 * @return a reference to this
2101 * @stable ICU 2.0
2102 */
2103 inline UnicodeString& insert(int32_t start,
2104 const UnicodeString& srcText);
2105
2106 /**
2107 * Insert the characters in <TT>srcChars</TT> in the range
2108 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
2109 * object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2110 * @param start the offset at which the insertion begins
2111 * @param srcChars the source for the new characters
2112 * @param srcStart the offset into <TT>srcChars</TT> where new characters
2113 * will be obtained
2114 * @param srcLength the number of characters in <TT>srcChars</TT>
2115 * in the insert string
2116 * @return a reference to this
2117 * @stable ICU 2.0
2118 */
2119 inline UnicodeString& insert(int32_t start,
2120 const UChar *srcChars,
2121 int32_t srcStart,
2122 int32_t srcLength);
2123
2124 /**
2125 * Insert the characters in <TT>srcChars</TT> into the UnicodeString object
2126 * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2127 * @param start the offset where the insertion begins
2128 * @param srcChars the source for the new characters
2129 * @param srcLength the number of Unicode characters in srcChars.
2130 * @return a reference to this
2131 * @stable ICU 2.0
2132 */
2133 inline UnicodeString& insert(int32_t start,
2134 const UChar *srcChars,
2135 int32_t srcLength);
2136
2137 /**
2138 * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at
2139 * offset <TT>start</TT>.
2140 * @param start the offset at which the insertion occurs
2141 * @param srcChar the code unit to insert
2142 * @return a reference to this
2143 * @stable ICU 2.0
2144 */
2145 inline UnicodeString& insert(int32_t start,
2146 UChar srcChar);
2147
2148 /**
2149 * Insert the code point <TT>srcChar</TT> into the UnicodeString object at
2150 * offset <TT>start</TT>.
2151 * @param start the offset at which the insertion occurs
2152 * @param srcChar the code point to insert
2153 * @return a reference to this
2154 * @stable ICU 2.0
2155 */
2156 inline UnicodeString& insert(int32_t start,
2157 UChar32 srcChar);
2158
2159
2160 /* Replace operations */
2161
2162 /**
2163 * Replace the characters in the range
2164 * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2165 * <TT>srcText</TT> in the range
2166 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
2167 * <TT>srcText</TT> is not modified.
2168 * @param start the offset at which the replace operation begins
2169 * @param length the number of characters to replace. The character at
2170 * <TT>start + length</TT> is not modified.
2171 * @param srcText the source for the new characters
2172 * @param srcStart the offset into <TT>srcText</TT> where new characters
2173 * will be obtained
2174 * @param srcLength the number of characters in <TT>srcText</TT> in
2175 * the replace string
2176 * @return a reference to this
2177 * @stable ICU 2.0
2178 */
2179 UnicodeString& replace(int32_t start,
2180 int32_t length,
2181 const UnicodeString& srcText,
2182 int32_t srcStart,
2183 int32_t srcLength);
2184
2185 /**
2186 * Replace the characters in the range
2187 * [<TT>start</TT>, <TT>start + length</TT>)
2188 * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is
2189 * not modified.
2190 * @param start the offset at which the replace operation begins
2191 * @param length the number of characters to replace. The character at
2192 * <TT>start + length</TT> is not modified.
2193 * @param srcText the source for the new characters
2194 * @return a reference to this
2195 * @stable ICU 2.0
2196 */
2197 UnicodeString& replace(int32_t start,
2198 int32_t length,
2199 const UnicodeString& srcText);
2200
2201 /**
2202 * Replace the characters in the range
2203 * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2204 * <TT>srcChars</TT> in the range
2205 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT>
2206 * is not modified.
2207 * @param start the offset at which the replace operation begins
2208 * @param length the number of characters to replace. The character at
2209 * <TT>start + length</TT> is not modified.
2210 * @param srcChars the source for the new characters
2211 * @param srcStart the offset into <TT>srcChars</TT> where new characters
2212 * will be obtained
2213 * @param srcLength the number of characters in <TT>srcChars</TT>
2214 * in the replace string
2215 * @return a reference to this
2216 * @stable ICU 2.0
2217 */
2218 UnicodeString& replace(int32_t start,
2219 int32_t length,
2220 const UChar *srcChars,
2221 int32_t srcStart,
2222 int32_t srcLength);
2223
2224 /**
2225 * Replace the characters in the range
2226 * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2227 * <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
2228 * @param start the offset at which the replace operation begins
2229 * @param length number of characters to replace. The character at
2230 * <TT>start + length</TT> is not modified.
2231 * @param srcChars the source for the new characters
2232 * @param srcLength the number of Unicode characters in srcChars
2233 * @return a reference to this
2234 * @stable ICU 2.0
2235 */
2236 inline UnicodeString& replace(int32_t start,
2237 int32_t length,
2238 const UChar *srcChars,
2239 int32_t srcLength);
2240
2241 /**
2242 * Replace the characters in the range
2243 * [<TT>start</TT>, <TT>start + length</TT>) with the code unit
2244 * <TT>srcChar</TT>.
2245 * @param start the offset at which the replace operation begins
2246 * @param length the number of characters to replace. The character at
2247 * <TT>start + length</TT> is not modified.
2248 * @param srcChar the new code unit
2249 * @return a reference to this
2250 * @stable ICU 2.0
2251 */
2252 inline UnicodeString& replace(int32_t start,
2253 int32_t length,
2254 UChar srcChar);
2255
2256 /**
2257 * Replace the characters in the range
2258 * [<TT>start</TT>, <TT>start + length</TT>) with the code point
2259 * <TT>srcChar</TT>.
2260 * @param start the offset at which the replace operation begins
2261 * @param length the number of characters to replace. The character at
2262 * <TT>start + length</TT> is not modified.
2263 * @param srcChar the new code point
2264 * @return a reference to this
2265 * @stable ICU 2.0
2266 */
2267 inline UnicodeString& replace(int32_t start,
2268 int32_t length,
2269 UChar32 srcChar);
2270
2271 /**
2272 * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
2273 * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified.
2274 * @param start the offset at which the replace operation begins
2275 * @param limit the offset immediately following the replace range
2276 * @param srcText the source for the new characters
2277 * @return a reference to this
2278 * @stable ICU 2.0
2279 */
2280 inline UnicodeString& replaceBetween(int32_t start,
2281 int32_t limit,
2282 const UnicodeString& srcText);
2283
2284 /**
2285 * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
2286 * with the characters in <TT>srcText</TT> in the range
2287 * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified.
2288 * @param start the offset at which the replace operation begins
2289 * @param limit the offset immediately following the replace range
2290 * @param srcText the source for the new characters
2291 * @param srcStart the offset into <TT>srcChars</TT> where new characters
2292 * will be obtained
2293 * @param srcLimit the offset immediately following the range to copy
2294 * in <TT>srcText</TT>
2295 * @return a reference to this
2296 * @stable ICU 2.0
2297 */
2298 inline UnicodeString& replaceBetween(int32_t start,
2299 int32_t limit,
2300 const UnicodeString& srcText,
2301 int32_t srcStart,
2302 int32_t srcLimit);
2303
2304 /**
2305 * Replace a substring of this object with the given text.
2306 * @param start the beginning index, inclusive; <code>0 <= start
2307 * <= limit</code>.
2308 * @param limit the ending index, exclusive; <code>start <= limit
2309 * <= length()</code>.
2310 * @param text the text to replace characters <code>start</code>
2311 * to <code>limit - 1</code>
2312 * @stable ICU 2.0
2313 */
2314 virtual void handleReplaceBetween(int32_t start,
2315 int32_t limit,
2316 const UnicodeString& text);
2317
2318 /**
2319 * Replaceable API
2320 * @return TRUE if it has MetaData
2321 * @stable ICU 2.4
2322 */
2323 virtual UBool hasMetaData() const;
2324
2325 /**
2326 * Copy a substring of this object, retaining attribute (out-of-band)
2327 * information. This method is used to duplicate or reorder substrings.
2328 * The destination index must not overlap the source range.
2329 *
2330 * @param start the beginning index, inclusive; <code>0 <= start <=
2331 * limit</code>.
2332 * @param limit the ending index, exclusive; <code>start <= limit <=
2333 * length()</code>.
2334 * @param dest the destination index. The characters from
2335 * <code>start..limit-1</code> will be copied to <code>dest</code>.
2336 * Implementations of this method may assume that <code>dest <= start ||
2337 * dest >= limit</code>.
2338 * @stable ICU 2.0
2339 */
2340 virtual void copy(int32_t start, int32_t limit, int32_t dest);
2341
2342 /* Search and replace operations */
2343
2344 /**
2345 * Replace all occurrences of characters in oldText with the characters
2346 * in newText
2347 * @param oldText the text containing the search text
2348 * @param newText the text containing the replacement text
2349 * @return a reference to this
2350 * @stable ICU 2.0
2351 */
2352 inline UnicodeString& findAndReplace(const UnicodeString& oldText,
2353 const UnicodeString& newText);
2354
2355 /**
2356 * Replace all occurrences of characters in oldText with characters
2357 * in newText
2358 * in the range [<TT>start</TT>, <TT>start + length</TT>).
2359 * @param start the start of the range in which replace will performed
2360 * @param length the length of the range in which replace will be performed
2361 * @param oldText the text containing the search text
2362 * @param newText the text containing the replacement text
2363 * @return a reference to this
2364 * @stable ICU 2.0
2365 */
2366 inline UnicodeString& findAndReplace(int32_t start,
2367 int32_t length,
2368 const UnicodeString& oldText,
2369 const UnicodeString& newText);
2370
2371 /**
2372 * Replace all occurrences of characters in oldText in the range
2373 * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters
2374 * in newText in the range
2375 * [<TT>newStart</TT>, <TT>newStart + newLength</TT>)
2376 * in the range [<TT>start</TT>, <TT>start + length</TT>).
2377 * @param start the start of the range in which replace will performed
2378 * @param length the length of the range in which replace will be performed
2379 * @param oldText the text containing the search text
2380 * @param oldStart the start of the search range in <TT>oldText</TT>
2381 * @param oldLength the length of the search range in <TT>oldText</TT>
2382 * @param newText the text containing the replacement text
2383 * @param newStart the start of the replacement range in <TT>newText</TT>
2384 * @param newLength the length of the replacement range in <TT>newText</TT>
2385 * @return a reference to this
2386 * @stable ICU 2.0
2387 */
2388 UnicodeString& findAndReplace(int32_t start,
2389 int32_t length,
2390 const UnicodeString& oldText,
2391 int32_t oldStart,
2392 int32_t oldLength,
2393 const UnicodeString& newText,
2394 int32_t newStart,
2395 int32_t newLength);
2396
2397
2398 /* Remove operations */
2399
2400 /**
2401 * Remove all characters from the UnicodeString object.
2402 * @return a reference to this
2403 * @stable ICU 2.0
2404 */
2405 inline UnicodeString& remove(void);
2406
2407 /**
2408 * Remove the characters in the range
2409 * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object.
2410 * @param start the offset of the first character to remove
2411 * @param length the number of characters to remove
2412 * @return a reference to this
2413 * @stable ICU 2.0
2414 */
2415 inline UnicodeString& remove(int32_t start,
2416 int32_t length = (int32_t)INT32_MAX);
2417
2418 /**
2419 * Remove the characters in the range
2420 * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object.
2421 * @param start the offset of the first character to remove
2422 * @param limit the offset immediately following the range to remove
2423 * @return a reference to this
2424 * @stable ICU 2.0
2425 */
2426 inline UnicodeString& removeBetween(int32_t start,
2427 int32_t limit = (int32_t)INT32_MAX);
2428
2429 /**
2430 * Retain only the characters in the range
2431 * [<code>start</code>, <code>limit</code>) from the UnicodeString object.
2432 * Removes characters before <code>start</code> and at and after <code>limit</code>.
2433 * @param start the offset of the first character to retain
2434 * @param limit the offset immediately following the range to retain
2435 * @return a reference to this
2436 * @stable ICU 4.4
2437 */
2438 inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
2439
2440 /* Length operations */
2441
2442 /**
2443 * Pad the start of this UnicodeString with the character <TT>padChar</TT>.
2444 * If the length of this UnicodeString is less than targetLength,
2445 * length() - targetLength copies of padChar will be added to the
2446 * beginning of this UnicodeString.
2447 * @param targetLength the desired length of the string
2448 * @param padChar the character to use for padding. Defaults to
2449 * space (U+0020)
2450 * @return TRUE if the text was padded, FALSE otherwise.
2451 * @stable ICU 2.0
2452 */
2453 UBool padLeading(int32_t targetLength,
2454 UChar padChar = 0x0020);
2455
2456 /**
2457 * Pad the end of this UnicodeString with the character <TT>padChar</TT>.
2458 * If the length of this UnicodeString is less than targetLength,
2459 * length() - targetLength copies of padChar will be added to the
2460 * end of this UnicodeString.
2461 * @param targetLength the desired length of the string
2462 * @param padChar the character to use for padding. Defaults to
2463 * space (U+0020)
2464 * @return TRUE if the text was padded, FALSE otherwise.
2465 * @stable ICU 2.0
2466 */
2467 UBool padTrailing(int32_t targetLength,
2468 UChar padChar = 0x0020);
2469
2470 /**
2471 * Truncate this UnicodeString to the <TT>targetLength</TT>.
2472 * @param targetLength the desired length of this UnicodeString.
2473 * @return TRUE if the text was truncated, FALSE otherwise
2474 * @stable ICU 2.0
2475 */
2476 inline UBool truncate(int32_t targetLength);
2477
2478 /**
2479 * Trims leading and trailing whitespace from this UnicodeString.
2480 * @return a reference to this
2481 * @stable ICU 2.0
2482 */
2483 UnicodeString& trim(void);
2484
2485
2486 /* Miscellaneous operations */
2487
2488 /**
2489 * Reverse this UnicodeString in place.
2490 * @return a reference to this
2491 * @stable ICU 2.0
2492 */
2493 inline UnicodeString& reverse(void);
2494
2495 /**
2496 * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in
2497 * this UnicodeString.
2498 * @param start the start of the range to reverse
2499 * @param length the number of characters to to reverse
2500 * @return a reference to this
2501 * @stable ICU 2.0
2502 */
2503 inline UnicodeString& reverse(int32_t start,
2504 int32_t length);
2505
2506 /**
2507 * Convert the characters in this to UPPER CASE following the conventions of
2508 * the default locale.
2509 * @return A reference to this.
2510 * @stable ICU 2.0
2511 */
2512 UnicodeString& toUpper(void);
2513
2514 /**
2515 * Convert the characters in this to UPPER CASE following the conventions of
2516 * a specific locale.
2517 * @param locale The locale containing the conventions to use.
2518 * @return A reference to this.
2519 * @stable ICU 2.0
2520 */
2521 UnicodeString& toUpper(const Locale& locale);
2522
2523 /**
2524 * Convert the characters in this to lower case following the conventions of
2525 * the default locale.
2526 * @return A reference to this.
2527 * @stable ICU 2.0
2528 */
2529 UnicodeString& toLower(void);
2530
2531 /**
2532 * Convert the characters in this to lower case following the conventions of
2533 * a specific locale.
2534 * @param locale The locale containing the conventions to use.
2535 * @return A reference to this.
2536 * @stable ICU 2.0
2537 */
2538 UnicodeString& toLower(const Locale& locale);
2539
2540 #if !UCONFIG_NO_BREAK_ITERATION
2541
2542 /**
2543 * Titlecase this string, convenience function using the default locale.
2544 *
2545 * Casing is locale-dependent and context-sensitive.
2546 * Titlecasing uses a break iterator to find the first characters of words
2547 * that are to be titlecased. It titlecases those characters and lowercases
2548 * all others.
2549 *
2550 * The titlecase break iterator can be provided to customize for arbitrary
2551 * styles, using rules and dictionaries beyond the standard iterators.
2552 * It may be more efficient to always provide an iterator to avoid
2553 * opening and closing one for each string.
2554 * The standard titlecase iterator for the root locale implements the
2555 * algorithm of Unicode TR 21.
2556 *
2557 * This function uses only the setText(), first() and next() methods of the
2558 * provided break iterator.
2559 *
2560 * @param titleIter A break iterator to find the first characters of words
2561 * that are to be titlecased.
2562 * If none is provided (0), then a standard titlecase
2563 * break iterator is opened.
2564 * Otherwise the provided iterator is set to the string's text.
2565 * @return A reference to this.
2566 * @stable ICU 2.1
2567 */
2568 UnicodeString &toTitle(BreakIterator *titleIter);
2569
2570 /**
2571 * Titlecase this string.
2572 *
2573 * Casing is locale-dependent and context-sensitive.
2574 * Titlecasing uses a break iterator to find the first characters of words
2575 * that are to be titlecased. It titlecases those characters and lowercases
2576 * all others.
2577 *
2578 * The titlecase break iterator can be provided to customize for arbitrary
2579 * styles, using rules and dictionaries beyond the standard iterators.
2580 * It may be more efficient to always provide an iterator to avoid
2581 * opening and closing one for each string.
2582 * The standard titlecase iterator for the root locale implements the
2583 * algorithm of Unicode TR 21.
2584 *
2585 * This function uses only the setText(), first() and next() methods of the
2586 * provided break iterator.
2587 *
2588 * @param titleIter A break iterator to find the first characters of words
2589 * that are to be titlecased.
2590 * If none is provided (0), then a standard titlecase
2591 * break iterator is opened.
2592 * Otherwise the provided iterator is set to the string's text.
2593 * @param locale The locale to consider.
2594 * @return A reference to this.
2595 * @stable ICU 2.1
2596 */
2597 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
2598
2599 /**
2600 * Titlecase this string, with options.
2601 *
2602 * Casing is locale-dependent and context-sensitive.
2603 * Titlecasing uses a break iterator to find the first characters of words
2604 * that are to be titlecased. It titlecases those characters and lowercases
2605 * all others. (This can be modified with options.)
2606 *
2607 * The titlecase break iterator can be provided to customize for arbitrary
2608 * styles, using rules and dictionaries beyond the standard iterators.
2609 * It may be more efficient to always provide an iterator to avoid
2610 * opening and closing one for each string.
2611 * The standard titlecase iterator for the root locale implements the
2612 * algorithm of Unicode TR 21.
2613 *
2614 * This function uses only the setText(), first() and next() methods of the
2615 * provided break iterator.
2616 *
2617 * @param titleIter A break iterator to find the first characters of words
2618 * that are to be titlecased.
2619 * If none is provided (0), then a standard titlecase
2620 * break iterator is opened.
2621 * Otherwise the provided iterator is set to the string's text.
2622 * @param locale The locale to consider.
2623 * @param options Options bit set, see ucasemap_open().
2624 * @return A reference to this.
2625 * @see U_TITLECASE_NO_LOWERCASE
2626 * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
2627 * @see ucasemap_open
2628 * @stable ICU 3.8
2629 */
2630 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
2631
2632 #endif
2633
2634 /**
2635 * Case-fold the characters in this string.
2636 * Case-folding is locale-independent and not context-sensitive,
2637 * but there is an option for whether to include or exclude mappings for dotted I
2638 * and dotless i that are marked with 'I' in CaseFolding.txt.
2639 * The result may be longer or shorter than the original.
2640 *
2641 * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
2642 * @return A reference to this.
2643 * @stable ICU 2.0
2644 */
2645 UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
2646
2647 //========================================
2648 // Access to the internal buffer
2649 //========================================
2650
2651 /**
2652 * Get a read/write pointer to the internal buffer.
2653 * The buffer is guaranteed to be large enough for at least minCapacity UChars,
2654 * writable, and is still owned by the UnicodeString object.
2655 * Calls to getBuffer(minCapacity) must not be nested, and
2656 * must be matched with calls to releaseBuffer(newLength).
2657 * If the string buffer was read-only or shared,
2658 * then it will be reallocated and copied.
2659 *
2660 * An attempted nested call will return 0, and will not further modify the
2661 * state of the UnicodeString object.
2662 * It also returns 0 if the string is bogus.
2663 *
2664 * The actual capacity of the string buffer may be larger than minCapacity.
2665 * getCapacity() returns the actual capacity.
2666 * For many operations, the full capacity should be used to avoid reallocations.
2667 *
2668 * While the buffer is "open" between getBuffer(minCapacity)
2669 * and releaseBuffer(newLength), the following applies:
2670 * - The string length is set to 0.
2671 * - Any read API call on the UnicodeString object will behave like on a 0-length string.
2672 * - Any write API call on the UnicodeString object is disallowed and will have no effect.
2673 * - You can read from and write to the returned buffer.
2674 * - The previous string contents will still be in the buffer;
2675 * if you want to use it, then you need to call length() before getBuffer(minCapacity).
2676 * If the length() was greater than minCapacity, then any contents after minCapacity
2677 * may be lost.
2678 * The buffer contents is not NUL-terminated by getBuffer().
2679 * If length()<getCapacity() then you can terminate it by writing a NUL
2680 * at index length().
2681 * - You must call releaseBuffer(newLength) before and in order to
2682 * return to normal UnicodeString operation.
2683 *
2684 * @param minCapacity the minimum number of UChars that are to be available
2685 * in the buffer, starting at the returned pointer;
2686 * default to the current string capacity if minCapacity==-1
2687 * @return a writable pointer to the internal string buffer,
2688 * or 0 if an error occurs (nested calls, out of memory)
2689 *
2690 * @see releaseBuffer
2691 * @see getTerminatedBuffer()
2692 * @stable ICU 2.0
2693 */
2694 UChar *getBuffer(int32_t minCapacity);
2695
2696 /**
2697 * Release a read/write buffer on a UnicodeString object with an
2698 * "open" getBuffer(minCapacity).
2699 * This function must be called in a matched pair with getBuffer(minCapacity).
2700 * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
2701 *
2702 * It will set the string length to newLength, at most to the current capacity.
2703 * If newLength==-1 then it will set the length according to the
2704 * first NUL in the buffer, or to the capacity if there is no NUL.
2705 *
2706 * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
2707 *
2708 * @param newLength the new length of the UnicodeString object;
2709 * defaults to the current capacity if newLength is greater than that;
2710 * if newLength==-1, it defaults to u_strlen(buffer) but not more than
2711 * the current capacity of the string
2712 *
2713 * @see getBuffer(int32_t minCapacity)
2714 * @stable ICU 2.0
2715 */
2716 void releaseBuffer(int32_t newLength=-1);
2717
2718 /**
2719 * Get a read-only pointer to the internal buffer.
2720 * This can be called at any time on a valid UnicodeString.
2721 *
2722 * It returns 0 if the string is bogus, or
2723 * during an "open" getBuffer(minCapacity).
2724 *
2725 * It can be called as many times as desired.
2726 * The pointer that it returns will remain valid until the UnicodeString object is modified,
2727 * at which time the pointer is semantically invalidated and must not be used any more.
2728 *
2729 * The capacity of the buffer can be determined with getCapacity().
2730 * The part after length() may or may not be initialized and valid,
2731 * depending on the history of the UnicodeString object.
2732 *
2733 * The buffer contents is (probably) not NUL-terminated.
2734 * You can check if it is with
2735 * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>.
2736 * (See getTerminatedBuffer().)
2737 *
2738 * The buffer may reside in read-only memory. Its contents must not
2739 * be modified.
2740 *
2741 * @return a read-only pointer to the internal string buffer,
2742 * or 0 if the string is empty or bogus
2743 *
2744 * @see getBuffer(int32_t minCapacity)
2745 * @see getTerminatedBuffer()
2746 * @stable ICU 2.0
2747 */
2748 inline const UChar *getBuffer() const;
2749
2750 /**
2751 * Get a read-only pointer to the internal buffer,
2752 * making sure that it is NUL-terminated.
2753 * This can be called at any time on a valid UnicodeString.
2754 *
2755 * It returns 0 if the string is bogus, or
2756 * during an "open" getBuffer(minCapacity), or if the buffer cannot
2757 * be NUL-terminated (because memory allocation failed).
2758 *
2759 * It can be called as many times as desired.
2760 * The pointer that it returns will remain valid until the UnicodeString object is modified,
2761 * at which time the pointer is semantically invalidated and must not be used any more.
2762 *
2763 * The capacity of the buffer can be determined with getCapacity().
2764 * The part after length()+1 may or may not be initialized and valid,
2765 * depending on the history of the UnicodeString object.
2766 *
2767 * The buffer contents is guaranteed to be NUL-terminated.
2768 * getTerminatedBuffer() may reallocate the buffer if a terminating NUL
2769 * is written.
2770 * For this reason, this function is not const, unlike getBuffer().
2771 * Note that a UnicodeString may also contain NUL characters as part of its contents.
2772 *
2773 * The buffer may reside in read-only memory. Its contents must not
2774 * be modified.
2775 *
2776 * @return a read-only pointer to the internal string buffer,
2777 * or 0 if the string is empty or bogus
2778 *
2779 * @see getBuffer(int32_t minCapacity)
2780 * @see getBuffer()
2781 * @stable ICU 2.2
2782 */
2783 inline const UChar *getTerminatedBuffer();
2784
2785 //========================================
2786 // Constructors
2787 //========================================
2788
2789 /** Construct an empty UnicodeString.
2790 * @stable ICU 2.0
2791 */
2792 UnicodeString();
2793
2794 /**
2795 * Construct a UnicodeString with capacity to hold <TT>capacity</TT> UChars
2796 * @param capacity the number of UChars this UnicodeString should hold
2797 * before a resize is necessary; if count is greater than 0 and count
2798 * code points c take up more space than capacity, then capacity is adjusted
2799 * accordingly.
2800 * @param c is used to initially fill the string
2801 * @param count specifies how many code points c are to be written in the
2802 * string
2803 * @stable ICU 2.0
2804 */
2805 UnicodeString(int32_t capacity, UChar32 c, int32_t count);
2806
2807 /**
2808 * Single UChar (code unit) constructor.
2809 * @param ch the character to place in the UnicodeString
2810 * @stable ICU 2.0
2811 */
2812 UnicodeString(UChar ch);
2813
2814 /**
2815 * Single UChar32 (code point) constructor.
2816 * @param ch the character to place in the UnicodeString
2817 * @stable ICU 2.0
2818 */
2819 UnicodeString(UChar32 ch);
2820
2821 /**
2822 * UChar* constructor.
2823 * @param text The characters to place in the UnicodeString. <TT>text</TT>
2824 * must be NULL (U+0000) terminated.
2825 * @stable ICU 2.0
2826 */
2827 UnicodeString(const UChar *text);
2828
2829 /**
2830 * UChar* constructor.
2831 * @param text The characters to place in the UnicodeString.
2832 * @param textLength The number of Unicode characters in <TT>text</TT>
2833 * to copy.
2834 * @stable ICU 2.0
2835 */
2836 UnicodeString(const UChar *text,
2837 int32_t textLength);
2838
2839 /**
2840 * Readonly-aliasing UChar* constructor.
2841 * The text will be used for the UnicodeString object, but
2842 * it will not be released when the UnicodeString is destroyed.
2843 * This has copy-on-write semantics:
2844 * When the string is modified, then the buffer is first copied into
2845 * newly allocated memory.
2846 * The aliased buffer is never modified.
2847 * In an assignment to another UnicodeString, the text will be aliased again,
2848 * so that both strings then alias the same readonly-text.
2849 *
2850 * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
2851 * This must be true if <code>textLength==-1</code>.
2852 * @param text The characters to alias for the UnicodeString.
2853 * @param textLength The number of Unicode characters in <code>text</code> to alias.
2854 * If -1, then this constructor will determine the length
2855 * by calling <code>u_strlen()</code>.
2856 * @stable ICU 2.0
2857 */
2858 UnicodeString(UBool isTerminated,
2859 const UChar *text,
2860 int32_t textLength);
2861
2862 /**
2863 * Writable-aliasing UChar* constructor.
2864 * The text will be used for the UnicodeString object, but
2865 * it will not be released when the UnicodeString is destroyed.
2866 * This has write-through semantics:
2867 * For as long as the capacity of the buffer is sufficient, write operations
2868 * will directly affect the buffer. When more capacity is necessary, then
2869 * a new buffer will be allocated and the contents copied as with regularly
2870 * constructed strings.
2871 * In an assignment to another UnicodeString, the buffer will be copied.
2872 * The extract(UChar *dst) function detects whether the dst pointer is the same
2873 * as the string buffer itself and will in this case not copy the contents.
2874 *
2875 * @param buffer The characters to alias for the UnicodeString.
2876 * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
2877 * @param buffCapacity The size of <code>buffer</code> in UChars.
2878 * @stable ICU 2.0
2879 */
2880 UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
2881
2882 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
2883
2884 /**
2885 * char* constructor.
2886 * @param codepageData an array of bytes, null-terminated,
2887 * in the platform's default codepage.
2888 * @stable ICU 2.0
2889 */
2890 UnicodeString(const char *codepageData);
2891
2892 /**
2893 * char* constructor.
2894 * @param codepageData an array of bytes in the platform's default codepage.
2895 * @param dataLength The number of bytes in <TT>codepageData</TT>.
2896 * @stable ICU 2.0
2897 */
2898 UnicodeString(const char *codepageData, int32_t dataLength);
2899
2900 #endif
2901
2902 #if !UCONFIG_NO_CONVERSION
2903
2904 /**
2905 * char* constructor.
2906 * @param codepageData an array of bytes, null-terminated
2907 * @param codepage the encoding of <TT>codepageData</TT>. The special
2908 * value 0 for <TT>codepage</TT> indicates that the text is in the
2909 * platform's default codepage.
2910 *
2911 * If <code>codepage</code> is an empty string (<code>""</code>),
2912 * then a simple conversion is performed on the codepage-invariant
2913 * subset ("invariant characters") of the platform encoding. See utypes.h.
2914 * Recommendation: For invariant-character strings use the constructor
2915 * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
2916 * because it avoids object code dependencies of UnicodeString on
2917 * the conversion code.
2918 *
2919 * @stable ICU 2.0
2920 */
2921 UnicodeString(const char *codepageData, const char *codepage);
2922
2923 /**
2924 * char* constructor.
2925 * @param codepageData an array of bytes.
2926 * @param dataLength The number of bytes in <TT>codepageData</TT>.
2927 * @param codepage the encoding of <TT>codepageData</TT>. The special
2928 * value 0 for <TT>codepage</TT> indicates that the text is in the
2929 * platform's default codepage.
2930 * If <code>codepage</code> is an empty string (<code>""</code>),
2931 * then a simple conversion is performed on the codepage-invariant
2932 * subset ("invariant characters") of the platform encoding. See utypes.h.
2933 * Recommendation: For invariant-character strings use the constructor
2934 * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
2935 * because it avoids object code dependencies of UnicodeString on
2936 * the conversion code.
2937 *
2938 * @stable ICU 2.0
2939 */
2940 UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
2941
2942 /**
2943 * char * / UConverter constructor.
2944 * This constructor uses an existing UConverter object to
2945 * convert the codepage string to Unicode and construct a UnicodeString
2946 * from that.
2947 *
2948 * The converter is reset at first.
2949 * If the error code indicates a failure before this constructor is called,
2950 * or if an error occurs during conversion or construction,
2951 * then the string will be bogus.
2952 *
2953 * This function avoids the overhead of opening and closing a converter if
2954 * multiple strings are constructed.
2955 *
2956 * @param src input codepage string
2957 * @param srcLength length of the input string, can be -1 for NUL-terminated strings
2958 * @param cnv converter object (ucnv_resetToUnicode() will be called),
2959 * can be NULL for the default converter
2960 * @param errorCode normal ICU error code
2961 * @stable ICU 2.0
2962 */
2963 UnicodeString(
2964 const char *src, int32_t srcLength,
2965 UConverter *cnv,
2966 UErrorCode &errorCode);
2967
2968 #endif
2969
2970 /**
2971 * Constructs a Unicode string from an invariant-character char * string.
2972 * About invariant characters see utypes.h.
2973 * This constructor has no runtime dependency on conversion code and is
2974 * therefore recommended over ones taking a charset name string
2975 * (where the empty string "" indicates invariant-character conversion).
2976 *
2977 * Use the macro US_INV as the third, signature-distinguishing parameter.
2978 *
2979 * For example:
2980 * \code
2981 * void fn(const char *s) {
2982 * UnicodeString ustr(s, -1, US_INV);
2983 * // use ustr ...
2984 * }
2985 * \endcode
2986 *
2987 * @param src String using only invariant characters.
2988 * @param length Length of src, or -1 if NUL-terminated.
2989 * @param inv Signature-distinguishing paramater, use US_INV.
2990 *
2991 * @see US_INV
2992 * @stable ICU 3.2
2993 */
2994 UnicodeString(const char *src, int32_t length, enum EInvariant inv);
2995
2996
2997 /**
2998 * Copy constructor.
2999 * @param that The UnicodeString object to copy.
3000 * @stable ICU 2.0
3001 */
3002 UnicodeString(const UnicodeString& that);
3003
3004 /**
3005 * 'Substring' constructor from tail of source string.
3006 * @param src The UnicodeString object to copy.
3007 * @param srcStart The offset into <tt>src</tt> at which to start copying.
3008 * @stable ICU 2.2
3009 */
3010 UnicodeString(const UnicodeString& src, int32_t srcStart);
3011
3012 /**
3013 * 'Substring' constructor from subrange of source string.
3014 * @param src The UnicodeString object to copy.
3015 * @param srcStart The offset into <tt>src</tt> at which to start copying.
3016 * @param srcLength The number of characters from <tt>src</tt> to copy.
3017 * @stable ICU 2.2
3018 */
3019 UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3020
3021 /**
3022 * Clone this object, an instance of a subclass of Replaceable.
3023 * Clones can be used concurrently in multiple threads.
3024 * If a subclass does not implement clone(), or if an error occurs,
3025 * then NULL is returned.
3026 * The clone functions in all subclasses return a pointer to a Replaceable
3027 * because some compilers do not support covariant (same-as-this)
3028 * return types; cast to the appropriate subclass if necessary.
3029 * The caller must delete the clone.
3030 *
3031 * @return a clone of this object
3032 *
3033 * @see Replaceable::clone
3034 * @see getDynamicClassID
3035 * @stable ICU 2.6
3036 */
3037 virtual Replaceable *clone() const;
3038
3039 /** Destructor.
3040 * @stable ICU 2.0
3041 */
3042 virtual ~UnicodeString();
3043
3044 /**
3045 * Create a UnicodeString from a UTF-8 string.
3046 * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
3047 * Calls u_strFromUTF8WithSub().
3048 *
3049 * @param utf8 UTF-8 input string.
3050 * Note that a StringPiece can be implicitly constructed
3051 * from a std::string or a NUL-terminated const char * string.
3052 * @return A UnicodeString with equivalent UTF-16 contents.
3053 * @see toUTF8
3054 * @see toUTF8String
3055 * @stable ICU 4.2
3056 */
3057 static UnicodeString fromUTF8(const StringPiece &utf8);
3058
3059 /**
3060 * Create a UnicodeString from a UTF-32 string.
3061 * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
3062 * Calls u_strFromUTF32WithSub().
3063 *
3064 * @param utf32 UTF-32 input string. Must not be NULL.
3065 * @param length Length of the input string, or -1 if NUL-terminated.
3066 * @return A UnicodeString with equivalent UTF-16 contents.
3067 * @see toUTF32
3068 * @stable ICU 4.2
3069 */
3070 static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
3071
3072 /* Miscellaneous operations */
3073
3074 /**
3075 * Unescape a string of characters and return a string containing
3076 * the result. The following escape sequences are recognized:
3077 *
3078 * \\uhhhh 4 hex digits; h in [0-9A-Fa-f]
3079 * \\Uhhhhhhhh 8 hex digits
3080 * \\xhh 1-2 hex digits
3081 * \\ooo 1-3 octal digits; o in [0-7]
3082 * \\cX control-X; X is masked with 0x1F
3083 *
3084 * as well as the standard ANSI C escapes:
3085 *
3086 * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
3087 * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
3088 * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
3089 *
3090 * Anything else following a backslash is generically escaped. For
3091 * example, "[a\\-z]" returns "[a-z]".
3092 *
3093 * If an escape sequence is ill-formed, this method returns an empty
3094 * string. An example of an ill-formed sequence is "\\u" followed by
3095 * fewer than 4 hex digits.
3096 *
3097 * This function is similar to u_unescape() but not identical to it.
3098 * The latter takes a source char*, so it does escape recognition
3099 * and also invariant conversion.
3100 *
3101 * @return a string with backslash escapes interpreted, or an
3102 * empty string on error.
3103 * @see UnicodeString#unescapeAt()
3104 * @see u_unescape()
3105 * @see u_unescapeAt()
3106 * @stable ICU 2.0
3107 */
3108 UnicodeString unescape() const;
3109
3110 /**
3111 * Unescape a single escape sequence and return the represented
3112 * character. See unescape() for a listing of the recognized escape
3113 * sequences. The character at offset-1 is assumed (without
3114 * checking) to be a backslash. If the escape sequence is
3115 * ill-formed, or the offset is out of range, (UChar32)0xFFFFFFFF is
3116 * returned.
3117 *
3118 * @param offset an input output parameter. On input, it is the
3119 * offset into this string where the escape sequence is located,
3120 * after the initial backslash. On output, it is advanced after the
3121 * last character parsed. On error, it is not advanced at all.
3122 * @return the character represented by the escape sequence at
3123 * offset, or (UChar32)0xFFFFFFFF on error.
3124 * @see UnicodeString#unescape()
3125 * @see u_unescape()
3126 * @see u_unescapeAt()
3127 * @stable ICU 2.0
3128 */
3129 UChar32 unescapeAt(int32_t &offset) const;
3130
3131 /**
3132 * ICU "poor man's RTTI", returns a UClassID for this class.
3133 *
3134 * @stable ICU 2.2
3135 */
3136 static UClassID U_EXPORT2 getStaticClassID();
3137
3138 /**
3139 * ICU "poor man's RTTI", returns a UClassID for the actual class.
3140 *
3141 * @stable ICU 2.2
3142 */
3143 virtual UClassID getDynamicClassID() const;
3144
3145 //========================================
3146 // Implementation methods
3147 //========================================
3148
3149 protected:
3150 /**
3151 * Implement Replaceable::getLength() (see jitterbug 1027).
3152 * @stable ICU 2.4
3153 */
3154 virtual int32_t getLength() const;
3155
3156 /**
3157 * The change in Replaceable to use virtual getCharAt() allows
3158 * UnicodeString::charAt() to be inline again (see jitterbug 709).
3159 * @stable ICU 2.4
3160 */
3161 virtual UChar getCharAt(int32_t offset) const;
3162
3163 /**
3164 * The change in Replaceable to use virtual getChar32At() allows
3165 * UnicodeString::char32At() to be inline again (see jitterbug 709).
3166 * @stable ICU 2.4
3167 */
3168 virtual UChar32 getChar32At(int32_t offset) const;
3169
3170 private:
3171 // For char* constructors. Could be made public.
3172 UnicodeString &setToUTF8(const StringPiece &utf8);
3173 // For extract(char*).
3174 // We could make a toUTF8(target, capacity, errorCode) public but not
3175 // this version: New API will be cleaner if we make callers create substrings
3176 // rather than having start+length on every method,
3177 // and it should take a UErrorCode&.
3178 int32_t
3179 toUTF8(int32_t start, int32_t len,
3180 char *target, int32_t capacity) const;
3181
3182
3183 inline int8_t
3184 doCompare(int32_t start,
3185 int32_t length,
3186 const UnicodeString& srcText,
3187 int32_t srcStart,
3188 int32_t srcLength) const;
3189
3190 int8_t doCompare(int32_t start,
3191 int32_t length,
3192 const UChar *srcChars,
3193 int32_t srcStart,
3194 int32_t srcLength) const;
3195
3196 inline int8_t
3197 doCompareCodePointOrder(int32_t start,
3198 int32_t length,
3199 const UnicodeString& srcText,
3200 int32_t srcStart,
3201 int32_t srcLength) const;
3202
3203 int8_t doCompareCodePointOrder(int32_t start,
3204 int32_t length,
3205 const UChar *srcChars,
3206 int32_t srcStart,
3207 int32_t srcLength) const;
3208
3209 inline int8_t
3210 doCaseCompare(int32_t start,
3211 int32_t length,
3212 const UnicodeString &srcText,
3213 int32_t srcStart,
3214 int32_t srcLength,
3215 uint32_t options) const;
3216
3217 int8_t
3218 doCaseCompare(int32_t start,
3219 int32_t length,
3220 const UChar *srcChars,
3221 int32_t srcStart,
3222 int32_t srcLength,
3223 uint32_t options) const;
3224
3225 int32_t doIndexOf(UChar c,
3226 int32_t start,
3227 int32_t length) const;
3228
3229 int32_t doIndexOf(UChar32 c,
3230 int32_t start,
3231 int32_t length) const;
3232
3233 int32_t doLastIndexOf(UChar c,
3234 int32_t start,
3235 int32_t length) const;
3236
3237 int32_t doLastIndexOf(UChar32 c,
3238 int32_t start,
3239 int32_t length) const;
3240
3241 void doExtract(int32_t start,
3242 int32_t length,
3243 UChar *dst,
3244 int32_t dstStart) const;
3245
3246 inline void doExtract(int32_t start,
3247 int32_t length,
3248 UnicodeString& target) const;
3249
3250 inline UChar doCharAt(int32_t offset) const;
3251
3252 UnicodeString& doReplace(int32_t start,
3253 int32_t length,
3254 const UnicodeString& srcText,
3255 int32_t srcStart,
3256 int32_t srcLength);
3257
3258 UnicodeString& doReplace(int32_t start,
3259 int32_t length,
3260 const UChar *srcChars,
3261 int32_t srcStart,
3262 int32_t srcLength);
3263
3264 UnicodeString& doReverse(int32_t start,
3265 int32_t length);
3266
3267 // calculate hash code
3268 int32_t doHashCode(void) const;
3269
3270 // get pointer to start of array
3271 // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
3272 inline UChar* getArrayStart(void);
3273 inline const UChar* getArrayStart(void) const;
3274
3275 // A UnicodeString object (not necessarily its current buffer)
3276 // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
3277 inline UBool isWritable() const;
3278
3279 // Is the current buffer writable?
3280 inline UBool isBufferWritable() const;
3281
3282 // None of the following does releaseArray().
3283 inline void setLength(int32_t len); // sets only fShortLength and fLength
3284 inline void setToEmpty(); // sets fFlags=kShortString
3285 inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags
3286
3287 // allocate the array; result may be fStackBuffer
3288 // sets refCount to 1 if appropriate
3289 // sets fArray, fCapacity, and fFlags
3290 // returns boolean for success or failure
3291 UBool allocate(int32_t capacity);
3292
3293 // release the array if owned
3294 void releaseArray(void);
3295
3296 // turn a bogus string into an empty one
3297 void unBogus();
3298
3299 // implements assigment operator, copy constructor, and fastCopyFrom()
3300 UnicodeString ©From(const UnicodeString &src, UBool fastCopy=FALSE);
3301
3302 // Pin start and limit to acceptable values.
3303 inline void pinIndex(int32_t& start) const;
3304 inline void pinIndices(int32_t& start,
3305 int32_t& length) const;
3306
3307 #if !UCONFIG_NO_CONVERSION
3308
3309 /* Internal extract() using UConverter. */
3310 int32_t doExtract(int32_t start, int32_t length,
3311 char *dest, int32_t destCapacity,
3312 UConverter *cnv,
3313 UErrorCode &errorCode) const;
3314
3315 /*
3316 * Real constructor for converting from codepage data.
3317 * It assumes that it is called with !fRefCounted.
3318 *
3319 * If <code>codepage==0</code>, then the default converter
3320 * is used for the platform encoding.
3321 * If <code>codepage</code> is an empty string (<code>""</code>),
3322 * then a simple conversion is performed on the codepage-invariant
3323 * subset ("invariant characters") of the platform encoding. See utypes.h.
3324 */
3325 void doCodepageCreate(const char *codepageData,
3326 int32_t dataLength,
3327 const char *codepage);
3328
3329 /*
3330 * Worker function for creating a UnicodeString from
3331 * a codepage string using a UConverter.
3332 */
3333 void
3334 doCodepageCreate(const char *codepageData,
3335 int32_t dataLength,
3336 UConverter *converter,
3337 UErrorCode &status);
3338
3339 #endif
3340
3341 /*
3342 * This function is called when write access to the array
3343 * is necessary.
3344 *
3345 * We need to make a copy of the array if
3346 * the buffer is read-only, or
3347 * the buffer is refCounted (shared), and refCount>1, or
3348 * the buffer is too small.
3349 *
3350 * Return FALSE if memory could not be allocated.
3351 */
3352 UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
3353 int32_t growCapacity = -1,
3354 UBool doCopyArray = TRUE,
3355 int32_t **pBufferToDelete = 0,
3356 UBool forceClone = FALSE);
3357
3358 // common function for case mappings
3359 UnicodeString &
3360 caseMap(BreakIterator *titleIter,
3361 const char *locale,
3362 uint32_t options,
3363 int32_t toWhichCase);
3364
3365 // ref counting
3366 void addRef(void);
3367 int32_t removeRef(void);
3368 int32_t refCount(void) const;
3369
3370 // constants
3371 enum {
3372 // Set the stack buffer size so that sizeof(UnicodeString) is,
3373 // naturally (without padding), a multiple of sizeof(pointer).
3374 US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for short strings
3375 kInvalidUChar=0xffff, // invalid UChar index
3376 kGrowSize=128, // grow size for this buffer
3377 kInvalidHashCode=0, // invalid hash code
3378 kEmptyHashCode=1, // hash code for empty string
3379
3380 // bit flag values for fFlags
3381 kIsBogus=1, // this string is bogus, i.e., not valid or NULL
3382 kUsingStackBuffer=2,// using fUnion.fStackBuffer instead of fUnion.fFields
3383 kRefCounted=4, // there is a refCount field before the characters in fArray
3384 kBufferIsReadonly=8,// do not write to this buffer
3385 kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"),
3386 // and releaseBuffer(newLength) must be called
3387
3388 // combined values for convenience
3389 kShortString=kUsingStackBuffer,
3390 kLongString=kRefCounted,
3391 kReadonlyAlias=kBufferIsReadonly,
3392 kWritableAlias=0
3393 };
3394
3395 friend class StringThreadTest;
3396 friend class UnicodeStringAppendable;
3397
3398 union StackBufferOrFields; // forward declaration necessary before friend declaration
3399 friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
3400
3401 /*
3402 * The following are all the class fields that are stored
3403 * in each UnicodeString object.
3404 * Note that UnicodeString has virtual functions,
3405 * therefore there is an implicit vtable pointer
3406 * as the first real field.
3407 * The fields should be aligned such that no padding is necessary.
3408 * On 32-bit machines, the size should be 32 bytes,
3409 * on 64-bit machines (8-byte pointers), it should be 40 bytes.
3410 *
3411 * We use a hack to achieve this.
3412 *
3413 * With at least some compilers, each of the following is forced to
3414 * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
3415 * rounded up with additional padding if the fields do not already fit that requirement:
3416 * - sizeof(class UnicodeString)
3417 * - offsetof(UnicodeString, fUnion)
3418 * - sizeof(fUnion)
3419 * - sizeof(fFields)
3420 *
3421 * In order to avoid padding, we make sizeof(fStackBuffer)=16 (=8 UChars)
3422 * which is at least as large as sizeof(fFields) on 32-bit and 64-bit machines.
3423 * (Padding at the end of fFields is ok:
3424 * As long as there is no padding after fStackBuffer, it is not wasted space.)
3425 *
3426 * We further assume that the compiler does not reorder the fields,
3427 * so that fRestOfStackBuffer (which holds a few more UChars) immediately follows after fUnion,
3428 * with at most some padding (but no other field) in between.
3429 * (Padding there would be wasted space, but functionally harmless.)
3430 *
3431 * We use a few more sizeof(pointer)'s chunks of space with
3432 * fRestOfStackBuffer, fShortLength and fFlags,
3433 * to get up exactly to the intended sizeof(UnicodeString).
3434 */
3435 // (implicit) *vtable;
3436 union StackBufferOrFields {
3437 // fStackBuffer is used iff (fFlags&kUsingStackBuffer)
3438 // else fFields is used
3439 UChar fStackBuffer[8]; // buffer for short strings, together with fRestOfStackBuffer
3440 struct {
3441 UChar *fArray; // the Unicode data
3442 int32_t fCapacity; // capacity of fArray (in UChars)
3443 int32_t fLength; // number of characters in fArray if >127; else undefined
3444 } fFields;
3445 } fUnion;
3446 UChar fRestOfStackBuffer[US_STACKBUF_SIZE-8];
3447 int8_t fShortLength; // 0..127: length <0: real length is in fUnion.fFields.fLength
3448 uint8_t fFlags; // bit flags: see constants above
3449 };
3450
3451 /**
3452 * Create a new UnicodeString with the concatenation of two others.
3453 *
3454 * @param s1 The first string to be copied to the new one.
3455 * @param s2 The second string to be copied to the new one, after s1.
3456 * @return UnicodeString(s1).append(s2)
3457 * @stable ICU 2.8
3458 */
3459 U_COMMON_API UnicodeString U_EXPORT2
3460 operator+ (const UnicodeString &s1, const UnicodeString &s2);
3461
3462 //========================================
3463 // Inline members
3464 //========================================
3465
3466 //========================================
3467 // Privates
3468 //========================================
3469
3470 inline void
pinIndex(int32_t & start)3471 UnicodeString::pinIndex(int32_t& start) const
3472 {
3473 // pin index
3474 if(start < 0) {
3475 start = 0;
3476 } else if(start > length()) {
3477 start = length();
3478 }
3479 }
3480
3481 inline void
pinIndices(int32_t & start,int32_t & _length)3482 UnicodeString::pinIndices(int32_t& start,
3483 int32_t& _length) const
3484 {
3485 // pin indices
3486 int32_t len = length();
3487 if(start < 0) {
3488 start = 0;
3489 } else if(start > len) {
3490 start = len;
3491 }
3492 if(_length < 0) {
3493 _length = 0;
3494 } else if(_length > (len - start)) {
3495 _length = (len - start);
3496 }
3497 }
3498
3499 inline UChar*
getArrayStart()3500 UnicodeString::getArrayStart()
3501 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
3502
3503 inline const UChar*
getArrayStart()3504 UnicodeString::getArrayStart() const
3505 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
3506
3507 //========================================
3508 // Read-only implementation methods
3509 //========================================
3510 inline int32_t
length()3511 UnicodeString::length() const
3512 { return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; }
3513
3514 inline int32_t
getCapacity()3515 UnicodeString::getCapacity() const
3516 { return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; }
3517
3518 inline int32_t
hashCode()3519 UnicodeString::hashCode() const
3520 { return doHashCode(); }
3521
3522 inline UBool
isBogus()3523 UnicodeString::isBogus() const
3524 { return (UBool)(fFlags & kIsBogus); }
3525
3526 inline UBool
isWritable()3527 UnicodeString::isWritable() const
3528 { return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); }
3529
3530 inline UBool
isBufferWritable()3531 UnicodeString::isBufferWritable() const
3532 {
3533 return (UBool)(
3534 !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
3535 (!(fFlags&kRefCounted) || refCount()==1));
3536 }
3537
3538 inline const UChar *
getBuffer()3539 UnicodeString::getBuffer() const {
3540 if(fFlags&(kIsBogus|kOpenGetBuffer)) {
3541 return 0;
3542 } else if(fFlags&kUsingStackBuffer) {
3543 return fUnion.fStackBuffer;
3544 } else {
3545 return fUnion.fFields.fArray;
3546 }
3547 }
3548
3549 //========================================
3550 // Read-only alias methods
3551 //========================================
3552 inline int8_t
doCompare(int32_t start,int32_t thisLength,const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)3553 UnicodeString::doCompare(int32_t start,
3554 int32_t thisLength,
3555 const UnicodeString& srcText,
3556 int32_t srcStart,
3557 int32_t srcLength) const
3558 {
3559 if(srcText.isBogus()) {
3560 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3561 } else {
3562 srcText.pinIndices(srcStart, srcLength);
3563 return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
3564 }
3565 }
3566
3567 inline UBool
3568 UnicodeString::operator== (const UnicodeString& text) const
3569 {
3570 if(isBogus()) {
3571 return text.isBogus();
3572 } else {
3573 int32_t len = length(), textLength = text.length();
3574 return
3575 !text.isBogus() &&
3576 len == textLength &&
3577 doCompare(0, len, text, 0, textLength) == 0;
3578 }
3579 }
3580
3581 inline UBool
3582 UnicodeString::operator!= (const UnicodeString& text) const
3583 { return (! operator==(text)); }
3584
3585 inline UBool
3586 UnicodeString::operator> (const UnicodeString& text) const
3587 { return doCompare(0, length(), text, 0, text.length()) == 1; }
3588
3589 inline UBool
3590 UnicodeString::operator< (const UnicodeString& text) const
3591 { return doCompare(0, length(), text, 0, text.length()) == -1; }
3592
3593 inline UBool
3594 UnicodeString::operator>= (const UnicodeString& text) const
3595 { return doCompare(0, length(), text, 0, text.length()) != -1; }
3596
3597 inline UBool
3598 UnicodeString::operator<= (const UnicodeString& text) const
3599 { return doCompare(0, length(), text, 0, text.length()) != 1; }
3600
3601 inline int8_t
compare(const UnicodeString & text)3602 UnicodeString::compare(const UnicodeString& text) const
3603 { return doCompare(0, length(), text, 0, text.length()); }
3604
3605 inline int8_t
compare(int32_t start,int32_t _length,const UnicodeString & srcText)3606 UnicodeString::compare(int32_t start,
3607 int32_t _length,
3608 const UnicodeString& srcText) const
3609 { return doCompare(start, _length, srcText, 0, srcText.length()); }
3610
3611 inline int8_t
compare(const UChar * srcChars,int32_t srcLength)3612 UnicodeString::compare(const UChar *srcChars,
3613 int32_t srcLength) const
3614 { return doCompare(0, length(), srcChars, 0, srcLength); }
3615
3616 inline int8_t
compare(int32_t start,int32_t _length,const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)3617 UnicodeString::compare(int32_t start,
3618 int32_t _length,
3619 const UnicodeString& srcText,
3620 int32_t srcStart,
3621 int32_t srcLength) const
3622 { return doCompare(start, _length, srcText, srcStart, srcLength); }
3623
3624 inline int8_t
compare(int32_t start,int32_t _length,const UChar * srcChars)3625 UnicodeString::compare(int32_t start,
3626 int32_t _length,
3627 const UChar *srcChars) const
3628 { return doCompare(start, _length, srcChars, 0, _length); }
3629
3630 inline int8_t
compare(int32_t start,int32_t _length,const UChar * srcChars,int32_t srcStart,int32_t srcLength)3631 UnicodeString::compare(int32_t start,
3632 int32_t _length,
3633 const UChar *srcChars,
3634 int32_t srcStart,
3635 int32_t srcLength) const
3636 { return doCompare(start, _length, srcChars, srcStart, srcLength); }
3637
3638 inline int8_t
compareBetween(int32_t start,int32_t limit,const UnicodeString & srcText,int32_t srcStart,int32_t srcLimit)3639 UnicodeString::compareBetween(int32_t start,
3640 int32_t limit,
3641 const UnicodeString& srcText,
3642 int32_t srcStart,
3643 int32_t srcLimit) const
3644 { return doCompare(start, limit - start,
3645 srcText, srcStart, srcLimit - srcStart); }
3646
3647 inline int8_t
doCompareCodePointOrder(int32_t start,int32_t thisLength,const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)3648 UnicodeString::doCompareCodePointOrder(int32_t start,
3649 int32_t thisLength,
3650 const UnicodeString& srcText,
3651 int32_t srcStart,
3652 int32_t srcLength) const
3653 {
3654 if(srcText.isBogus()) {
3655 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3656 } else {
3657 srcText.pinIndices(srcStart, srcLength);
3658 return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
3659 }
3660 }
3661
3662 inline int8_t
compareCodePointOrder(const UnicodeString & text)3663 UnicodeString::compareCodePointOrder(const UnicodeString& text) const
3664 { return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
3665
3666 inline int8_t
compareCodePointOrder(int32_t start,int32_t _length,const UnicodeString & srcText)3667 UnicodeString::compareCodePointOrder(int32_t start,
3668 int32_t _length,
3669 const UnicodeString& srcText) const
3670 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
3671
3672 inline int8_t
compareCodePointOrder(const UChar * srcChars,int32_t srcLength)3673 UnicodeString::compareCodePointOrder(const UChar *srcChars,
3674 int32_t srcLength) const
3675 { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
3676
3677 inline int8_t
compareCodePointOrder(int32_t start,int32_t _length,const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)3678 UnicodeString::compareCodePointOrder(int32_t start,
3679 int32_t _length,
3680 const UnicodeString& srcText,
3681 int32_t srcStart,
3682 int32_t srcLength) const
3683 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
3684
3685 inline int8_t
compareCodePointOrder(int32_t start,int32_t _length,const UChar * srcChars)3686 UnicodeString::compareCodePointOrder(int32_t start,
3687 int32_t _length,
3688 const UChar *srcChars) const
3689 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
3690
3691 inline int8_t
compareCodePointOrder(int32_t start,int32_t _length,const UChar * srcChars,int32_t srcStart,int32_t srcLength)3692 UnicodeString::compareCodePointOrder(int32_t start,
3693 int32_t _length,
3694 const UChar *srcChars,
3695 int32_t srcStart,
3696 int32_t srcLength) const
3697 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
3698
3699 inline int8_t
compareCodePointOrderBetween(int32_t start,int32_t limit,const UnicodeString & srcText,int32_t srcStart,int32_t srcLimit)3700 UnicodeString::compareCodePointOrderBetween(int32_t start,
3701 int32_t limit,
3702 const UnicodeString& srcText,
3703 int32_t srcStart,
3704 int32_t srcLimit) const
3705 { return doCompareCodePointOrder(start, limit - start,
3706 srcText, srcStart, srcLimit - srcStart); }
3707
3708 inline int8_t
doCaseCompare(int32_t start,int32_t thisLength,const UnicodeString & srcText,int32_t srcStart,int32_t srcLength,uint32_t options)3709 UnicodeString::doCaseCompare(int32_t start,
3710 int32_t thisLength,
3711 const UnicodeString &srcText,
3712 int32_t srcStart,
3713 int32_t srcLength,
3714 uint32_t options) const
3715 {
3716 if(srcText.isBogus()) {
3717 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3718 } else {
3719 srcText.pinIndices(srcStart, srcLength);
3720 return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
3721 }
3722 }
3723
3724 inline int8_t
caseCompare(const UnicodeString & text,uint32_t options)3725 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
3726 return doCaseCompare(0, length(), text, 0, text.length(), options);
3727 }
3728
3729 inline int8_t
caseCompare(int32_t start,int32_t _length,const UnicodeString & srcText,uint32_t options)3730 UnicodeString::caseCompare(int32_t start,
3731 int32_t _length,
3732 const UnicodeString &srcText,
3733 uint32_t options) const {
3734 return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
3735 }
3736
3737 inline int8_t
caseCompare(const UChar * srcChars,int32_t srcLength,uint32_t options)3738 UnicodeString::caseCompare(const UChar *srcChars,
3739 int32_t srcLength,
3740 uint32_t options) const {
3741 return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
3742 }
3743
3744 inline int8_t
caseCompare(int32_t start,int32_t _length,const UnicodeString & srcText,int32_t srcStart,int32_t srcLength,uint32_t options)3745 UnicodeString::caseCompare(int32_t start,
3746 int32_t _length,
3747 const UnicodeString &srcText,
3748 int32_t srcStart,
3749 int32_t srcLength,
3750 uint32_t options) const {
3751 return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
3752 }
3753
3754 inline int8_t
caseCompare(int32_t start,int32_t _length,const UChar * srcChars,uint32_t options)3755 UnicodeString::caseCompare(int32_t start,
3756 int32_t _length,
3757 const UChar *srcChars,
3758 uint32_t options) const {
3759 return doCaseCompare(start, _length, srcChars, 0, _length, options);
3760 }
3761
3762 inline int8_t
caseCompare(int32_t start,int32_t _length,const UChar * srcChars,int32_t srcStart,int32_t srcLength,uint32_t options)3763 UnicodeString::caseCompare(int32_t start,
3764 int32_t _length,
3765 const UChar *srcChars,
3766 int32_t srcStart,
3767 int32_t srcLength,
3768 uint32_t options) const {
3769 return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
3770 }
3771
3772 inline int8_t
caseCompareBetween(int32_t start,int32_t limit,const UnicodeString & srcText,int32_t srcStart,int32_t srcLimit,uint32_t options)3773 UnicodeString::caseCompareBetween(int32_t start,
3774 int32_t limit,
3775 const UnicodeString &srcText,
3776 int32_t srcStart,
3777 int32_t srcLimit,
3778 uint32_t options) const {
3779 return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
3780 }
3781
3782 inline int32_t
indexOf(const UnicodeString & srcText,int32_t srcStart,int32_t srcLength,int32_t start,int32_t _length)3783 UnicodeString::indexOf(const UnicodeString& srcText,
3784 int32_t srcStart,
3785 int32_t srcLength,
3786 int32_t start,
3787 int32_t _length) const
3788 {
3789 if(!srcText.isBogus()) {
3790 srcText.pinIndices(srcStart, srcLength);
3791 if(srcLength > 0) {
3792 return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
3793 }
3794 }
3795 return -1;
3796 }
3797
3798 inline int32_t
indexOf(const UnicodeString & text)3799 UnicodeString::indexOf(const UnicodeString& text) const
3800 { return indexOf(text, 0, text.length(), 0, length()); }
3801
3802 inline int32_t
indexOf(const UnicodeString & text,int32_t start)3803 UnicodeString::indexOf(const UnicodeString& text,
3804 int32_t start) const {
3805 pinIndex(start);
3806 return indexOf(text, 0, text.length(), start, length() - start);
3807 }
3808
3809 inline int32_t
indexOf(const UnicodeString & text,int32_t start,int32_t _length)3810 UnicodeString::indexOf(const UnicodeString& text,
3811 int32_t start,
3812 int32_t _length) const
3813 { return indexOf(text, 0, text.length(), start, _length); }
3814
3815 inline int32_t
indexOf(const UChar * srcChars,int32_t srcLength,int32_t start)3816 UnicodeString::indexOf(const UChar *srcChars,
3817 int32_t srcLength,
3818 int32_t start) const {
3819 pinIndex(start);
3820 return indexOf(srcChars, 0, srcLength, start, length() - start);
3821 }
3822
3823 inline int32_t
indexOf(const UChar * srcChars,int32_t srcLength,int32_t start,int32_t _length)3824 UnicodeString::indexOf(const UChar *srcChars,
3825 int32_t srcLength,
3826 int32_t start,
3827 int32_t _length) const
3828 { return indexOf(srcChars, 0, srcLength, start, _length); }
3829
3830 inline int32_t
indexOf(UChar c,int32_t start,int32_t _length)3831 UnicodeString::indexOf(UChar c,
3832 int32_t start,
3833 int32_t _length) const
3834 { return doIndexOf(c, start, _length); }
3835
3836 inline int32_t
indexOf(UChar32 c,int32_t start,int32_t _length)3837 UnicodeString::indexOf(UChar32 c,
3838 int32_t start,
3839 int32_t _length) const
3840 { return doIndexOf(c, start, _length); }
3841
3842 inline int32_t
indexOf(UChar c)3843 UnicodeString::indexOf(UChar c) const
3844 { return doIndexOf(c, 0, length()); }
3845
3846 inline int32_t
indexOf(UChar32 c)3847 UnicodeString::indexOf(UChar32 c) const
3848 { return indexOf(c, 0, length()); }
3849
3850 inline int32_t
indexOf(UChar c,int32_t start)3851 UnicodeString::indexOf(UChar c,
3852 int32_t start) const {
3853 pinIndex(start);
3854 return doIndexOf(c, start, length() - start);
3855 }
3856
3857 inline int32_t
indexOf(UChar32 c,int32_t start)3858 UnicodeString::indexOf(UChar32 c,
3859 int32_t start) const {
3860 pinIndex(start);
3861 return indexOf(c, start, length() - start);
3862 }
3863
3864 inline int32_t
lastIndexOf(const UChar * srcChars,int32_t srcLength,int32_t start,int32_t _length)3865 UnicodeString::lastIndexOf(const UChar *srcChars,
3866 int32_t srcLength,
3867 int32_t start,
3868 int32_t _length) const
3869 { return lastIndexOf(srcChars, 0, srcLength, start, _length); }
3870
3871 inline int32_t
lastIndexOf(const UChar * srcChars,int32_t srcLength,int32_t start)3872 UnicodeString::lastIndexOf(const UChar *srcChars,
3873 int32_t srcLength,
3874 int32_t start) const {
3875 pinIndex(start);
3876 return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
3877 }
3878
3879 inline int32_t
lastIndexOf(const UnicodeString & srcText,int32_t srcStart,int32_t srcLength,int32_t start,int32_t _length)3880 UnicodeString::lastIndexOf(const UnicodeString& srcText,
3881 int32_t srcStart,
3882 int32_t srcLength,
3883 int32_t start,
3884 int32_t _length) const
3885 {
3886 if(!srcText.isBogus()) {
3887 srcText.pinIndices(srcStart, srcLength);
3888 if(srcLength > 0) {
3889 return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
3890 }
3891 }
3892 return -1;
3893 }
3894
3895 inline int32_t
lastIndexOf(const UnicodeString & text,int32_t start,int32_t _length)3896 UnicodeString::lastIndexOf(const UnicodeString& text,
3897 int32_t start,
3898 int32_t _length) const
3899 { return lastIndexOf(text, 0, text.length(), start, _length); }
3900
3901 inline int32_t
lastIndexOf(const UnicodeString & text,int32_t start)3902 UnicodeString::lastIndexOf(const UnicodeString& text,
3903 int32_t start) const {
3904 pinIndex(start);
3905 return lastIndexOf(text, 0, text.length(), start, length() - start);
3906 }
3907
3908 inline int32_t
lastIndexOf(const UnicodeString & text)3909 UnicodeString::lastIndexOf(const UnicodeString& text) const
3910 { return lastIndexOf(text, 0, text.length(), 0, length()); }
3911
3912 inline int32_t
lastIndexOf(UChar c,int32_t start,int32_t _length)3913 UnicodeString::lastIndexOf(UChar c,
3914 int32_t start,
3915 int32_t _length) const
3916 { return doLastIndexOf(c, start, _length); }
3917
3918 inline int32_t
lastIndexOf(UChar32 c,int32_t start,int32_t _length)3919 UnicodeString::lastIndexOf(UChar32 c,
3920 int32_t start,
3921 int32_t _length) const {
3922 return doLastIndexOf(c, start, _length);
3923 }
3924
3925 inline int32_t
lastIndexOf(UChar c)3926 UnicodeString::lastIndexOf(UChar c) const
3927 { return doLastIndexOf(c, 0, length()); }
3928
3929 inline int32_t
lastIndexOf(UChar32 c)3930 UnicodeString::lastIndexOf(UChar32 c) const {
3931 return lastIndexOf(c, 0, length());
3932 }
3933
3934 inline int32_t
lastIndexOf(UChar c,int32_t start)3935 UnicodeString::lastIndexOf(UChar c,
3936 int32_t start) const {
3937 pinIndex(start);
3938 return doLastIndexOf(c, start, length() - start);
3939 }
3940
3941 inline int32_t
lastIndexOf(UChar32 c,int32_t start)3942 UnicodeString::lastIndexOf(UChar32 c,
3943 int32_t start) const {
3944 pinIndex(start);
3945 return lastIndexOf(c, start, length() - start);
3946 }
3947
3948 inline UBool
startsWith(const UnicodeString & text)3949 UnicodeString::startsWith(const UnicodeString& text) const
3950 { return compare(0, text.length(), text, 0, text.length()) == 0; }
3951
3952 inline UBool
startsWith(const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)3953 UnicodeString::startsWith(const UnicodeString& srcText,
3954 int32_t srcStart,
3955 int32_t srcLength) const
3956 { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
3957
3958 inline UBool
startsWith(const UChar * srcChars,int32_t srcLength)3959 UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const {
3960 if(srcLength < 0) {
3961 srcLength = u_strlen(srcChars);
3962 }
3963 return doCompare(0, srcLength, srcChars, 0, srcLength) == 0;
3964 }
3965
3966 inline UBool
startsWith(const UChar * srcChars,int32_t srcStart,int32_t srcLength)3967 UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const {
3968 if(srcLength < 0) {
3969 srcLength = u_strlen(srcChars);
3970 }
3971 return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;
3972 }
3973
3974 inline UBool
endsWith(const UnicodeString & text)3975 UnicodeString::endsWith(const UnicodeString& text) const
3976 { return doCompare(length() - text.length(), text.length(),
3977 text, 0, text.length()) == 0; }
3978
3979 inline UBool
endsWith(const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)3980 UnicodeString::endsWith(const UnicodeString& srcText,
3981 int32_t srcStart,
3982 int32_t srcLength) const {
3983 srcText.pinIndices(srcStart, srcLength);
3984 return doCompare(length() - srcLength, srcLength,
3985 srcText, srcStart, srcLength) == 0;
3986 }
3987
3988 inline UBool
endsWith(const UChar * srcChars,int32_t srcLength)3989 UnicodeString::endsWith(const UChar *srcChars,
3990 int32_t srcLength) const {
3991 if(srcLength < 0) {
3992 srcLength = u_strlen(srcChars);
3993 }
3994 return doCompare(length() - srcLength, srcLength,
3995 srcChars, 0, srcLength) == 0;
3996 }
3997
3998 inline UBool
endsWith(const UChar * srcChars,int32_t srcStart,int32_t srcLength)3999 UnicodeString::endsWith(const UChar *srcChars,
4000 int32_t srcStart,
4001 int32_t srcLength) const {
4002 if(srcLength < 0) {
4003 srcLength = u_strlen(srcChars + srcStart);
4004 }
4005 return doCompare(length() - srcLength, srcLength,
4006 srcChars, srcStart, srcLength) == 0;
4007 }
4008
4009 //========================================
4010 // replace
4011 //========================================
4012 inline UnicodeString&
replace(int32_t start,int32_t _length,const UnicodeString & srcText)4013 UnicodeString::replace(int32_t start,
4014 int32_t _length,
4015 const UnicodeString& srcText)
4016 { return doReplace(start, _length, srcText, 0, srcText.length()); }
4017
4018 inline UnicodeString&
replace(int32_t start,int32_t _length,const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)4019 UnicodeString::replace(int32_t start,
4020 int32_t _length,
4021 const UnicodeString& srcText,
4022 int32_t srcStart,
4023 int32_t srcLength)
4024 { return doReplace(start, _length, srcText, srcStart, srcLength); }
4025
4026 inline UnicodeString&
replace(int32_t start,int32_t _length,const UChar * srcChars,int32_t srcLength)4027 UnicodeString::replace(int32_t start,
4028 int32_t _length,
4029 const UChar *srcChars,
4030 int32_t srcLength)
4031 { return doReplace(start, _length, srcChars, 0, srcLength); }
4032
4033 inline UnicodeString&
replace(int32_t start,int32_t _length,const UChar * srcChars,int32_t srcStart,int32_t srcLength)4034 UnicodeString::replace(int32_t start,
4035 int32_t _length,
4036 const UChar *srcChars,
4037 int32_t srcStart,
4038 int32_t srcLength)
4039 { return doReplace(start, _length, srcChars, srcStart, srcLength); }
4040
4041 inline UnicodeString&
replace(int32_t start,int32_t _length,UChar srcChar)4042 UnicodeString::replace(int32_t start,
4043 int32_t _length,
4044 UChar srcChar)
4045 { return doReplace(start, _length, &srcChar, 0, 1); }
4046
4047 inline UnicodeString&
replace(int32_t start,int32_t _length,UChar32 srcChar)4048 UnicodeString::replace(int32_t start,
4049 int32_t _length,
4050 UChar32 srcChar) {
4051 UChar buffer[U16_MAX_LENGTH];
4052 int32_t count = 0;
4053 UBool isError = FALSE;
4054 U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
4055 return doReplace(start, _length, buffer, 0, count);
4056 }
4057
4058 inline UnicodeString&
replaceBetween(int32_t start,int32_t limit,const UnicodeString & srcText)4059 UnicodeString::replaceBetween(int32_t start,
4060 int32_t limit,
4061 const UnicodeString& srcText)
4062 { return doReplace(start, limit - start, srcText, 0, srcText.length()); }
4063
4064 inline UnicodeString&
replaceBetween(int32_t start,int32_t limit,const UnicodeString & srcText,int32_t srcStart,int32_t srcLimit)4065 UnicodeString::replaceBetween(int32_t start,
4066 int32_t limit,
4067 const UnicodeString& srcText,
4068 int32_t srcStart,
4069 int32_t srcLimit)
4070 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
4071
4072 inline UnicodeString&
findAndReplace(const UnicodeString & oldText,const UnicodeString & newText)4073 UnicodeString::findAndReplace(const UnicodeString& oldText,
4074 const UnicodeString& newText)
4075 { return findAndReplace(0, length(), oldText, 0, oldText.length(),
4076 newText, 0, newText.length()); }
4077
4078 inline UnicodeString&
findAndReplace(int32_t start,int32_t _length,const UnicodeString & oldText,const UnicodeString & newText)4079 UnicodeString::findAndReplace(int32_t start,
4080 int32_t _length,
4081 const UnicodeString& oldText,
4082 const UnicodeString& newText)
4083 { return findAndReplace(start, _length, oldText, 0, oldText.length(),
4084 newText, 0, newText.length()); }
4085
4086 // ============================
4087 // extract
4088 // ============================
4089 inline void
doExtract(int32_t start,int32_t _length,UnicodeString & target)4090 UnicodeString::doExtract(int32_t start,
4091 int32_t _length,
4092 UnicodeString& target) const
4093 { target.replace(0, target.length(), *this, start, _length); }
4094
4095 inline void
extract(int32_t start,int32_t _length,UChar * target,int32_t targetStart)4096 UnicodeString::extract(int32_t start,
4097 int32_t _length,
4098 UChar *target,
4099 int32_t targetStart) const
4100 { doExtract(start, _length, target, targetStart); }
4101
4102 inline void
extract(int32_t start,int32_t _length,UnicodeString & target)4103 UnicodeString::extract(int32_t start,
4104 int32_t _length,
4105 UnicodeString& target) const
4106 { doExtract(start, _length, target); }
4107
4108 #if !UCONFIG_NO_CONVERSION
4109
4110 inline int32_t
extract(int32_t start,int32_t _length,char * dst,const char * codepage)4111 UnicodeString::extract(int32_t start,
4112 int32_t _length,
4113 char *dst,
4114 const char *codepage) const
4115
4116 {
4117 // This dstSize value will be checked explicitly
4118 return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
4119 }
4120
4121 #endif
4122
4123 inline void
extractBetween(int32_t start,int32_t limit,UChar * dst,int32_t dstStart)4124 UnicodeString::extractBetween(int32_t start,
4125 int32_t limit,
4126 UChar *dst,
4127 int32_t dstStart) const {
4128 pinIndex(start);
4129 pinIndex(limit);
4130 doExtract(start, limit - start, dst, dstStart);
4131 }
4132
4133 inline UnicodeString
tempSubStringBetween(int32_t start,int32_t limit)4134 UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
4135 return tempSubString(start, limit - start);
4136 }
4137
4138 inline UChar
doCharAt(int32_t offset)4139 UnicodeString::doCharAt(int32_t offset) const
4140 {
4141 if((uint32_t)offset < (uint32_t)length()) {
4142 return getArrayStart()[offset];
4143 } else {
4144 return kInvalidUChar;
4145 }
4146 }
4147
4148 inline UChar
charAt(int32_t offset)4149 UnicodeString::charAt(int32_t offset) const
4150 { return doCharAt(offset); }
4151
4152 inline UChar
4153 UnicodeString::operator[] (int32_t offset) const
4154 { return doCharAt(offset); }
4155
4156 inline UChar32
char32At(int32_t offset)4157 UnicodeString::char32At(int32_t offset) const
4158 {
4159 int32_t len = length();
4160 if((uint32_t)offset < (uint32_t)len) {
4161 const UChar *array = getArrayStart();
4162 UChar32 c;
4163 U16_GET(array, 0, offset, len, c);
4164 return c;
4165 } else {
4166 return kInvalidUChar;
4167 }
4168 }
4169
4170 inline int32_t
getChar32Start(int32_t offset)4171 UnicodeString::getChar32Start(int32_t offset) const {
4172 if((uint32_t)offset < (uint32_t)length()) {
4173 const UChar *array = getArrayStart();
4174 U16_SET_CP_START(array, 0, offset);
4175 return offset;
4176 } else {
4177 return 0;
4178 }
4179 }
4180
4181 inline int32_t
getChar32Limit(int32_t offset)4182 UnicodeString::getChar32Limit(int32_t offset) const {
4183 int32_t len = length();
4184 if((uint32_t)offset < (uint32_t)len) {
4185 const UChar *array = getArrayStart();
4186 U16_SET_CP_LIMIT(array, 0, offset, len);
4187 return offset;
4188 } else {
4189 return len;
4190 }
4191 }
4192
4193 inline UBool
isEmpty()4194 UnicodeString::isEmpty() const {
4195 return fShortLength == 0;
4196 }
4197
4198 //========================================
4199 // Write implementation methods
4200 //========================================
4201 inline void
setLength(int32_t len)4202 UnicodeString::setLength(int32_t len) {
4203 if(len <= 127) {
4204 fShortLength = (int8_t)len;
4205 } else {
4206 fShortLength = (int8_t)-1;
4207 fUnion.fFields.fLength = len;
4208 }
4209 }
4210
4211 inline void
setToEmpty()4212 UnicodeString::setToEmpty() {
4213 fShortLength = 0;
4214 fFlags = kShortString;
4215 }
4216
4217 inline void
setArray(UChar * array,int32_t len,int32_t capacity)4218 UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {
4219 setLength(len);
4220 fUnion.fFields.fArray = array;
4221 fUnion.fFields.fCapacity = capacity;
4222 }
4223
4224 inline const UChar *
getTerminatedBuffer()4225 UnicodeString::getTerminatedBuffer() {
4226 if(!isWritable()) {
4227 return 0;
4228 } else {
4229 UChar *array = getArrayStart();
4230 int32_t len = length();
4231 if(len < getCapacity() && ((fFlags&kRefCounted) == 0 || refCount() == 1)) {
4232 /*
4233 * kRefCounted: Do not write the NUL if the buffer is shared.
4234 * That is mostly safe, except when the length of one copy was modified
4235 * without copy-on-write, e.g., via truncate(newLength) or remove(void).
4236 * Then the NUL would be written into the middle of another copy's string.
4237 */
4238 if(!(fFlags&kBufferIsReadonly)) {
4239 /*
4240 * We must not write to a readonly buffer, but it is known to be
4241 * NUL-terminated if len<capacity.
4242 * A shared, allocated buffer (refCount()>1) must not have its contents
4243 * modified, but the NUL at [len] is beyond the string contents,
4244 * and multiple string objects and threads writing the same NUL into the
4245 * same location is harmless.
4246 * In all other cases, the buffer is fully writable and it is anyway safe
4247 * to write the NUL.
4248 *
4249 * Note: An earlier version of this code tested whether there is a NUL
4250 * at [len] already, but, while safe, it generated lots of warnings from
4251 * tools like valgrind and Purify.
4252 */
4253 array[len] = 0;
4254 }
4255 return array;
4256 } else if(cloneArrayIfNeeded(len+1)) {
4257 array = getArrayStart();
4258 array[len] = 0;
4259 return array;
4260 } else {
4261 return 0;
4262 }
4263 }
4264 }
4265
4266 inline UnicodeString&
4267 UnicodeString::operator= (UChar ch)
4268 { return doReplace(0, length(), &ch, 0, 1); }
4269
4270 inline UnicodeString&
4271 UnicodeString::operator= (UChar32 ch)
4272 { return replace(0, length(), ch); }
4273
4274 inline UnicodeString&
setTo(const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)4275 UnicodeString::setTo(const UnicodeString& srcText,
4276 int32_t srcStart,
4277 int32_t srcLength)
4278 {
4279 unBogus();
4280 return doReplace(0, length(), srcText, srcStart, srcLength);
4281 }
4282
4283 inline UnicodeString&
setTo(const UnicodeString & srcText,int32_t srcStart)4284 UnicodeString::setTo(const UnicodeString& srcText,
4285 int32_t srcStart)
4286 {
4287 unBogus();
4288 srcText.pinIndex(srcStart);
4289 return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
4290 }
4291
4292 inline UnicodeString&
setTo(const UnicodeString & srcText)4293 UnicodeString::setTo(const UnicodeString& srcText)
4294 {
4295 return copyFrom(srcText);
4296 }
4297
4298 inline UnicodeString&
setTo(const UChar * srcChars,int32_t srcLength)4299 UnicodeString::setTo(const UChar *srcChars,
4300 int32_t srcLength)
4301 {
4302 unBogus();
4303 return doReplace(0, length(), srcChars, 0, srcLength);
4304 }
4305
4306 inline UnicodeString&
setTo(UChar srcChar)4307 UnicodeString::setTo(UChar srcChar)
4308 {
4309 unBogus();
4310 return doReplace(0, length(), &srcChar, 0, 1);
4311 }
4312
4313 inline UnicodeString&
setTo(UChar32 srcChar)4314 UnicodeString::setTo(UChar32 srcChar)
4315 {
4316 unBogus();
4317 return replace(0, length(), srcChar);
4318 }
4319
4320 inline UnicodeString&
append(const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)4321 UnicodeString::append(const UnicodeString& srcText,
4322 int32_t srcStart,
4323 int32_t srcLength)
4324 { return doReplace(length(), 0, srcText, srcStart, srcLength); }
4325
4326 inline UnicodeString&
append(const UnicodeString & srcText)4327 UnicodeString::append(const UnicodeString& srcText)
4328 { return doReplace(length(), 0, srcText, 0, srcText.length()); }
4329
4330 inline UnicodeString&
append(const UChar * srcChars,int32_t srcStart,int32_t srcLength)4331 UnicodeString::append(const UChar *srcChars,
4332 int32_t srcStart,
4333 int32_t srcLength)
4334 { return doReplace(length(), 0, srcChars, srcStart, srcLength); }
4335
4336 inline UnicodeString&
append(const UChar * srcChars,int32_t srcLength)4337 UnicodeString::append(const UChar *srcChars,
4338 int32_t srcLength)
4339 { return doReplace(length(), 0, srcChars, 0, srcLength); }
4340
4341 inline UnicodeString&
append(UChar srcChar)4342 UnicodeString::append(UChar srcChar)
4343 { return doReplace(length(), 0, &srcChar, 0, 1); }
4344
4345 inline UnicodeString&
append(UChar32 srcChar)4346 UnicodeString::append(UChar32 srcChar) {
4347 UChar buffer[U16_MAX_LENGTH];
4348 int32_t _length = 0;
4349 UBool isError = FALSE;
4350 U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
4351 return doReplace(length(), 0, buffer, 0, _length);
4352 }
4353
4354 inline UnicodeString&
4355 UnicodeString::operator+= (UChar ch)
4356 { return doReplace(length(), 0, &ch, 0, 1); }
4357
4358 inline UnicodeString&
4359 UnicodeString::operator+= (UChar32 ch) {
4360 return append(ch);
4361 }
4362
4363 inline UnicodeString&
4364 UnicodeString::operator+= (const UnicodeString& srcText)
4365 { return doReplace(length(), 0, srcText, 0, srcText.length()); }
4366
4367 inline UnicodeString&
insert(int32_t start,const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)4368 UnicodeString::insert(int32_t start,
4369 const UnicodeString& srcText,
4370 int32_t srcStart,
4371 int32_t srcLength)
4372 { return doReplace(start, 0, srcText, srcStart, srcLength); }
4373
4374 inline UnicodeString&
insert(int32_t start,const UnicodeString & srcText)4375 UnicodeString::insert(int32_t start,
4376 const UnicodeString& srcText)
4377 { return doReplace(start, 0, srcText, 0, srcText.length()); }
4378
4379 inline UnicodeString&
insert(int32_t start,const UChar * srcChars,int32_t srcStart,int32_t srcLength)4380 UnicodeString::insert(int32_t start,
4381 const UChar *srcChars,
4382 int32_t srcStart,
4383 int32_t srcLength)
4384 { return doReplace(start, 0, srcChars, srcStart, srcLength); }
4385
4386 inline UnicodeString&
insert(int32_t start,const UChar * srcChars,int32_t srcLength)4387 UnicodeString::insert(int32_t start,
4388 const UChar *srcChars,
4389 int32_t srcLength)
4390 { return doReplace(start, 0, srcChars, 0, srcLength); }
4391
4392 inline UnicodeString&
insert(int32_t start,UChar srcChar)4393 UnicodeString::insert(int32_t start,
4394 UChar srcChar)
4395 { return doReplace(start, 0, &srcChar, 0, 1); }
4396
4397 inline UnicodeString&
insert(int32_t start,UChar32 srcChar)4398 UnicodeString::insert(int32_t start,
4399 UChar32 srcChar)
4400 { return replace(start, 0, srcChar); }
4401
4402
4403 inline UnicodeString&
remove()4404 UnicodeString::remove()
4405 {
4406 // remove() of a bogus string makes the string empty and non-bogus
4407 // we also un-alias a read-only alias to deal with NUL-termination
4408 // issues with getTerminatedBuffer()
4409 if(fFlags & (kIsBogus|kBufferIsReadonly)) {
4410 setToEmpty();
4411 } else {
4412 fShortLength = 0;
4413 }
4414 return *this;
4415 }
4416
4417 inline UnicodeString&
remove(int32_t start,int32_t _length)4418 UnicodeString::remove(int32_t start,
4419 int32_t _length)
4420 {
4421 if(start <= 0 && _length == INT32_MAX) {
4422 // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
4423 return remove();
4424 }
4425 return doReplace(start, _length, NULL, 0, 0);
4426 }
4427
4428 inline UnicodeString&
removeBetween(int32_t start,int32_t limit)4429 UnicodeString::removeBetween(int32_t start,
4430 int32_t limit)
4431 { return doReplace(start, limit - start, NULL, 0, 0); }
4432
4433 inline UnicodeString &
retainBetween(int32_t start,int32_t limit)4434 UnicodeString::retainBetween(int32_t start, int32_t limit) {
4435 truncate(limit);
4436 return doReplace(0, start, NULL, 0, 0);
4437 }
4438
4439 inline UBool
truncate(int32_t targetLength)4440 UnicodeString::truncate(int32_t targetLength)
4441 {
4442 if(isBogus() && targetLength == 0) {
4443 // truncate(0) of a bogus string makes the string empty and non-bogus
4444 unBogus();
4445 return FALSE;
4446 } else if((uint32_t)targetLength < (uint32_t)length()) {
4447 setLength(targetLength);
4448 if(fFlags&kBufferIsReadonly) {
4449 fUnion.fFields.fCapacity = targetLength; // not NUL-terminated any more
4450 }
4451 return TRUE;
4452 } else {
4453 return FALSE;
4454 }
4455 }
4456
4457 inline UnicodeString&
reverse()4458 UnicodeString::reverse()
4459 { return doReverse(0, length()); }
4460
4461 inline UnicodeString&
reverse(int32_t start,int32_t _length)4462 UnicodeString::reverse(int32_t start,
4463 int32_t _length)
4464 { return doReverse(start, _length); }
4465
4466 U_NAMESPACE_END
4467
4468 #endif
4469