• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 1999-2012, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *   file name:  utf16.h
11 *   encoding:   UTF-8
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 1999sep09
16 *   created by: Markus W. Scherer
17 */
18 
19 /**
20  * \file
21  * \brief C API: 16-bit Unicode handling macros
22  *
23  * This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings.
24  *
25  * For more information see utf.h and the ICU User Guide Strings chapter
26  * (http://userguide.icu-project.org/strings).
27  *
28  * <em>Usage:</em>
29  * ICU coding guidelines for if() statements should be followed when using these macros.
30  * Compound statements (curly braces {}) must be used  for if-else-while...
31  * bodies and all macro statements should be terminated with semicolon.
32  */
33 
34 #ifndef __UTF16_H__
35 #define __UTF16_H__
36 
37 #include "unicode/umachine.h"
38 #ifndef __UTF_H__
39 #   include "unicode/utf.h"
40 #endif
41 
42 /* single-code point definitions -------------------------------------------- */
43 
44 /**
45  * Does this code unit alone encode a code point (BMP, not a surrogate)?
46  * @param c 16-bit code unit
47  * @return TRUE or FALSE
48  * @stable ICU 2.4
49  */
50 #define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)
51 
52 /**
53  * Is this code unit a lead surrogate (U+d800..U+dbff)?
54  * @param c 16-bit code unit
55  * @return TRUE or FALSE
56  * @stable ICU 2.4
57  */
58 #define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
59 
60 /**
61  * Is this code unit a trail surrogate (U+dc00..U+dfff)?
62  * @param c 16-bit code unit
63  * @return TRUE or FALSE
64  * @stable ICU 2.4
65  */
66 #define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
67 
68 /**
69  * Is this code unit a surrogate (U+d800..U+dfff)?
70  * @param c 16-bit code unit
71  * @return TRUE or FALSE
72  * @stable ICU 2.4
73  */
74 #define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
75 
76 /**
77  * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
78  * is it a lead surrogate?
79  * @param c 16-bit code unit
80  * @return TRUE or FALSE
81  * @stable ICU 2.4
82  */
83 #define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
84 
85 /**
86  * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
87  * is it a trail surrogate?
88  * @param c 16-bit code unit
89  * @return TRUE or FALSE
90  * @stable ICU 4.2
91  */
92 #define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0)
93 
94 /**
95  * Helper constant for U16_GET_SUPPLEMENTARY.
96  * @internal
97  */
98 #define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
99 
100 /**
101  * Get a supplementary code point value (U+10000..U+10ffff)
102  * from its lead and trail surrogates.
103  * The result is undefined if the input values are not
104  * lead and trail surrogates.
105  *
106  * @param lead lead surrogate (U+d800..U+dbff)
107  * @param trail trail surrogate (U+dc00..U+dfff)
108  * @return supplementary code point (U+10000..U+10ffff)
109  * @stable ICU 2.4
110  */
111 #define U16_GET_SUPPLEMENTARY(lead, trail) \
112     (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
113 
114 
115 /**
116  * Get the lead surrogate (0xd800..0xdbff) for a
117  * supplementary code point (0x10000..0x10ffff).
118  * @param supplementary 32-bit code point (U+10000..U+10ffff)
119  * @return lead surrogate (U+d800..U+dbff) for supplementary
120  * @stable ICU 2.4
121  */
122 #define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
123 
124 /**
125  * Get the trail surrogate (0xdc00..0xdfff) for a
126  * supplementary code point (0x10000..0x10ffff).
127  * @param supplementary 32-bit code point (U+10000..U+10ffff)
128  * @return trail surrogate (U+dc00..U+dfff) for supplementary
129  * @stable ICU 2.4
130  */
131 #define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
132 
133 /**
134  * How many 16-bit code units are used to encode this Unicode code point? (1 or 2)
135  * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff).
136  * @param c 32-bit code point
137  * @return 1 or 2
138  * @stable ICU 2.4
139  */
140 #define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
141 
142 /**
143  * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff).
144  * @return 2
145  * @stable ICU 2.4
146  */
147 #define U16_MAX_LENGTH 2
148 
149 /**
150  * Get a code point from a string at a random-access offset,
151  * without changing the offset.
152  * "Unsafe" macro, assumes well-formed UTF-16.
153  *
154  * The offset may point to either the lead or trail surrogate unit
155  * for a supplementary code point, in which case the macro will read
156  * the adjacent matching surrogate as well.
157  * The result is undefined if the offset points to a single, unpaired surrogate.
158  * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
159  *
160  * @param s const UChar * string
161  * @param i string offset
162  * @param c output UChar32 variable
163  * @see U16_GET
164  * @stable ICU 2.4
165  */
166 #define U16_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
167     (c)=(s)[i]; \
168     if(U16_IS_SURROGATE(c)) { \
169         if(U16_IS_SURROGATE_LEAD(c)) { \
170             (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \
171         } else { \
172             (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \
173         } \
174     } \
175 } UPRV_BLOCK_MACRO_END
176 
177 /**
178  * Get a code point from a string at a random-access offset,
179  * without changing the offset.
180  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
181  *
182  * The offset may point to either the lead or trail surrogate unit
183  * for a supplementary code point, in which case the macro will read
184  * the adjacent matching surrogate as well.
185  *
186  * The length can be negative for a NUL-terminated string.
187  *
188  * If the offset points to a single, unpaired surrogate, then
189  * c is set to that unpaired surrogate.
190  * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
191  *
192  * @param s const UChar * string
193  * @param start starting string offset (usually 0)
194  * @param i string offset, must be start<=i<length
195  * @param length string length
196  * @param c output UChar32 variable
197  * @see U16_GET_UNSAFE
198  * @stable ICU 2.4
199  */
200 #define U16_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
201     (c)=(s)[i]; \
202     if(U16_IS_SURROGATE(c)) { \
203         uint16_t __c2; \
204         if(U16_IS_SURROGATE_LEAD(c)) { \
205             if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
206                 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
207             } \
208         } else { \
209             if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
210                 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
211             } \
212         } \
213     } \
214 } UPRV_BLOCK_MACRO_END
215 
216 /**
217  * Get a code point from a string at a random-access offset,
218  * without changing the offset.
219  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
220  *
221  * The offset may point to either the lead or trail surrogate unit
222  * for a supplementary code point, in which case the macro will read
223  * the adjacent matching surrogate as well.
224  *
225  * The length can be negative for a NUL-terminated string.
226  *
227  * If the offset points to a single, unpaired surrogate, then
228  * c is set to U+FFFD.
229  * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT_OR_FFFD.
230  *
231  * @param s const UChar * string
232  * @param start starting string offset (usually 0)
233  * @param i string offset, must be start<=i<length
234  * @param length string length
235  * @param c output UChar32 variable
236  * @see U16_GET_UNSAFE
237  * @stable ICU 60
238  */
239 #define U16_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
240     (c)=(s)[i]; \
241     if(U16_IS_SURROGATE(c)) { \
242         uint16_t __c2; \
243         if(U16_IS_SURROGATE_LEAD(c)) { \
244             if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
245                 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
246             } else { \
247                 (c)=0xfffd; \
248             } \
249         } else { \
250             if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
251                 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
252             } else { \
253                 (c)=0xfffd; \
254             } \
255         } \
256     } \
257 } UPRV_BLOCK_MACRO_END
258 
259 /* definitions with forward iteration --------------------------------------- */
260 
261 /**
262  * Get a code point from a string at a code point boundary offset,
263  * and advance the offset to the next code point boundary.
264  * (Post-incrementing forward iteration.)
265  * "Unsafe" macro, assumes well-formed UTF-16.
266  *
267  * The offset may point to the lead surrogate unit
268  * for a supplementary code point, in which case the macro will read
269  * the following trail surrogate as well.
270  * If the offset points to a trail surrogate, then that itself
271  * will be returned as the code point.
272  * The result is undefined if the offset points to a single, unpaired lead surrogate.
273  *
274  * @param s const UChar * string
275  * @param i string offset
276  * @param c output UChar32 variable
277  * @see U16_NEXT
278  * @stable ICU 2.4
279  */
280 #define U16_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
281     (c)=(s)[(i)++]; \
282     if(U16_IS_LEAD(c)) { \
283         (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \
284     } \
285 } UPRV_BLOCK_MACRO_END
286 
287 /**
288  * Get a code point from a string at a code point boundary offset,
289  * and advance the offset to the next code point boundary.
290  * (Post-incrementing forward iteration.)
291  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
292  *
293  * The length can be negative for a NUL-terminated string.
294  *
295  * The offset may point to the lead surrogate unit
296  * for a supplementary code point, in which case the macro will read
297  * the following trail surrogate as well.
298  * If the offset points to a trail surrogate or
299  * to a single, unpaired lead surrogate, then c is set to that unpaired surrogate.
300  *
301  * @param s const UChar * string
302  * @param i string offset, must be i<length
303  * @param length string length
304  * @param c output UChar32 variable
305  * @see U16_NEXT_UNSAFE
306  * @stable ICU 2.4
307  */
308 #define U16_NEXT(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
309     (c)=(s)[(i)++]; \
310     if(U16_IS_LEAD(c)) { \
311         uint16_t __c2; \
312         if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
313             ++(i); \
314             (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
315         } \
316     } \
317 } UPRV_BLOCK_MACRO_END
318 
319 /**
320  * Get a code point from a string at a code point boundary offset,
321  * and advance the offset to the next code point boundary.
322  * (Post-incrementing forward iteration.)
323  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
324  *
325  * The length can be negative for a NUL-terminated string.
326  *
327  * The offset may point to the lead surrogate unit
328  * for a supplementary code point, in which case the macro will read
329  * the following trail surrogate as well.
330  * If the offset points to a trail surrogate or
331  * to a single, unpaired lead surrogate, then c is set to U+FFFD.
332  *
333  * @param s const UChar * string
334  * @param i string offset, must be i<length
335  * @param length string length
336  * @param c output UChar32 variable
337  * @see U16_NEXT_UNSAFE
338  * @stable ICU 60
339  */
340 #define U16_NEXT_OR_FFFD(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
341     (c)=(s)[(i)++]; \
342     if(U16_IS_SURROGATE(c)) { \
343         uint16_t __c2; \
344         if(U16_IS_SURROGATE_LEAD(c) && (i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
345             ++(i); \
346             (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
347         } else { \
348             (c)=0xfffd; \
349         } \
350     } \
351 } UPRV_BLOCK_MACRO_END
352 
353 /**
354  * Append a code point to a string, overwriting 1 or 2 code units.
355  * The offset points to the current end of the string contents
356  * and is advanced (post-increment).
357  * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
358  * Otherwise, the result is undefined.
359  *
360  * @param s const UChar * string buffer
361  * @param i string offset
362  * @param c code point to append
363  * @see U16_APPEND
364  * @stable ICU 2.4
365  */
366 #define U16_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
367     if((uint32_t)(c)<=0xffff) { \
368         (s)[(i)++]=(uint16_t)(c); \
369     } else { \
370         (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
371         (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
372     } \
373 } UPRV_BLOCK_MACRO_END
374 
375 /**
376  * Append a code point to a string, overwriting 1 or 2 code units.
377  * The offset points to the current end of the string contents
378  * and is advanced (post-increment).
379  * "Safe" macro, checks for a valid code point.
380  * If a surrogate pair is written, checks for sufficient space in the string.
381  * If the code point is not valid or a trail surrogate does not fit,
382  * then isError is set to TRUE.
383  *
384  * @param s const UChar * string buffer
385  * @param i string offset, must be i<capacity
386  * @param capacity size of the string buffer
387  * @param c code point to append
388  * @param isError output UBool set to TRUE if an error occurs, otherwise not modified
389  * @see U16_APPEND_UNSAFE
390  * @stable ICU 2.4
391  */
392 #define U16_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \
393     if((uint32_t)(c)<=0xffff) { \
394         (s)[(i)++]=(uint16_t)(c); \
395     } else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \
396         (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
397         (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
398     } else /* c>0x10ffff or not enough space */ { \
399         (isError)=TRUE; \
400     } \
401 } UPRV_BLOCK_MACRO_END
402 
403 /**
404  * Advance the string offset from one code point boundary to the next.
405  * (Post-incrementing iteration.)
406  * "Unsafe" macro, assumes well-formed UTF-16.
407  *
408  * @param s const UChar * string
409  * @param i string offset
410  * @see U16_FWD_1
411  * @stable ICU 2.4
412  */
413 #define U16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
414     if(U16_IS_LEAD((s)[(i)++])) { \
415         ++(i); \
416     } \
417 } UPRV_BLOCK_MACRO_END
418 
419 /**
420  * Advance the string offset from one code point boundary to the next.
421  * (Post-incrementing iteration.)
422  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
423  *
424  * The length can be negative for a NUL-terminated string.
425  *
426  * @param s const UChar * string
427  * @param i string offset, must be i<length
428  * @param length string length
429  * @see U16_FWD_1_UNSAFE
430  * @stable ICU 2.4
431  */
432 #define U16_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
433     if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \
434         ++(i); \
435     } \
436 } UPRV_BLOCK_MACRO_END
437 
438 /**
439  * Advance the string offset from one code point boundary to the n-th next one,
440  * i.e., move forward by n code points.
441  * (Post-incrementing iteration.)
442  * "Unsafe" macro, assumes well-formed UTF-16.
443  *
444  * @param s const UChar * string
445  * @param i string offset
446  * @param n number of code points to skip
447  * @see U16_FWD_N
448  * @stable ICU 2.4
449  */
450 #define U16_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
451     int32_t __N=(n); \
452     while(__N>0) { \
453         U16_FWD_1_UNSAFE(s, i); \
454         --__N; \
455     } \
456 } UPRV_BLOCK_MACRO_END
457 
458 /**
459  * Advance the string offset from one code point boundary to the n-th next one,
460  * i.e., move forward by n code points.
461  * (Post-incrementing iteration.)
462  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
463  *
464  * The length can be negative for a NUL-terminated string.
465  *
466  * @param s const UChar * string
467  * @param i int32_t string offset, must be i<length
468  * @param length int32_t string length
469  * @param n number of code points to skip
470  * @see U16_FWD_N_UNSAFE
471  * @stable ICU 2.4
472  */
473 #define U16_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
474     int32_t __N=(n); \
475     while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
476         U16_FWD_1(s, i, length); \
477         --__N; \
478     } \
479 } UPRV_BLOCK_MACRO_END
480 
481 /**
482  * Adjust a random-access offset to a code point boundary
483  * at the start of a code point.
484  * If the offset points to the trail surrogate of a surrogate pair,
485  * then the offset is decremented.
486  * Otherwise, it is not modified.
487  * "Unsafe" macro, assumes well-formed UTF-16.
488  *
489  * @param s const UChar * string
490  * @param i string offset
491  * @see U16_SET_CP_START
492  * @stable ICU 2.4
493  */
494 #define U16_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
495     if(U16_IS_TRAIL((s)[i])) { \
496         --(i); \
497     } \
498 } UPRV_BLOCK_MACRO_END
499 
500 /**
501  * Adjust a random-access offset to a code point boundary
502  * at the start of a code point.
503  * If the offset points to the trail surrogate of a surrogate pair,
504  * then the offset is decremented.
505  * Otherwise, it is not modified.
506  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
507  *
508  * @param s const UChar * string
509  * @param start starting string offset (usually 0)
510  * @param i string offset, must be start<=i
511  * @see U16_SET_CP_START_UNSAFE
512  * @stable ICU 2.4
513  */
514 #define U16_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
515     if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
516         --(i); \
517     } \
518 } UPRV_BLOCK_MACRO_END
519 
520 /* definitions with backward iteration -------------------------------------- */
521 
522 /**
523  * Move the string offset from one code point boundary to the previous one
524  * and get the code point between them.
525  * (Pre-decrementing backward iteration.)
526  * "Unsafe" macro, assumes well-formed UTF-16.
527  *
528  * The input offset may be the same as the string length.
529  * If the offset is behind a trail surrogate unit
530  * for a supplementary code point, then the macro will read
531  * the preceding lead surrogate as well.
532  * If the offset is behind a lead surrogate, then that itself
533  * will be returned as the code point.
534  * The result is undefined if the offset is behind a single, unpaired trail surrogate.
535  *
536  * @param s const UChar * string
537  * @param i string offset
538  * @param c output UChar32 variable
539  * @see U16_PREV
540  * @stable ICU 2.4
541  */
542 #define U16_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
543     (c)=(s)[--(i)]; \
544     if(U16_IS_TRAIL(c)) { \
545         (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \
546     } \
547 } UPRV_BLOCK_MACRO_END
548 
549 /**
550  * Move the string offset from one code point boundary to the previous one
551  * and get the code point between them.
552  * (Pre-decrementing backward iteration.)
553  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
554  *
555  * The input offset may be the same as the string length.
556  * If the offset is behind a trail surrogate unit
557  * for a supplementary code point, then the macro will read
558  * the preceding lead surrogate as well.
559  * If the offset is behind a lead surrogate or behind a single, unpaired
560  * trail surrogate, then c is set to that unpaired surrogate.
561  *
562  * @param s const UChar * string
563  * @param start starting string offset (usually 0)
564  * @param i string offset, must be start<i
565  * @param c output UChar32 variable
566  * @see U16_PREV_UNSAFE
567  * @stable ICU 2.4
568  */
569 #define U16_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
570     (c)=(s)[--(i)]; \
571     if(U16_IS_TRAIL(c)) { \
572         uint16_t __c2; \
573         if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
574             --(i); \
575             (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
576         } \
577     } \
578 } UPRV_BLOCK_MACRO_END
579 
580 /**
581  * Move the string offset from one code point boundary to the previous one
582  * and get the code point between them.
583  * (Pre-decrementing backward iteration.)
584  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
585  *
586  * The input offset may be the same as the string length.
587  * If the offset is behind a trail surrogate unit
588  * for a supplementary code point, then the macro will read
589  * the preceding lead surrogate as well.
590  * If the offset is behind a lead surrogate or behind a single, unpaired
591  * trail surrogate, then c is set to U+FFFD.
592  *
593  * @param s const UChar * string
594  * @param start starting string offset (usually 0)
595  * @param i string offset, must be start<i
596  * @param c output UChar32 variable
597  * @see U16_PREV_UNSAFE
598  * @stable ICU 60
599  */
600 #define U16_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
601     (c)=(s)[--(i)]; \
602     if(U16_IS_SURROGATE(c)) { \
603         uint16_t __c2; \
604         if(U16_IS_SURROGATE_TRAIL(c) && (i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
605             --(i); \
606             (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
607         } else { \
608             (c)=0xfffd; \
609         } \
610     } \
611 } UPRV_BLOCK_MACRO_END
612 
613 /**
614  * Move the string offset from one code point boundary to the previous one.
615  * (Pre-decrementing backward iteration.)
616  * The input offset may be the same as the string length.
617  * "Unsafe" macro, assumes well-formed UTF-16.
618  *
619  * @param s const UChar * string
620  * @param i string offset
621  * @see U16_BACK_1
622  * @stable ICU 2.4
623  */
624 #define U16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
625     if(U16_IS_TRAIL((s)[--(i)])) { \
626         --(i); \
627     } \
628 } UPRV_BLOCK_MACRO_END
629 
630 /**
631  * Move the string offset from one code point boundary to the previous one.
632  * (Pre-decrementing backward iteration.)
633  * The input offset may be the same as the string length.
634  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
635  *
636  * @param s const UChar * string
637  * @param start starting string offset (usually 0)
638  * @param i string offset, must be start<i
639  * @see U16_BACK_1_UNSAFE
640  * @stable ICU 2.4
641  */
642 #define U16_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
643     if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
644         --(i); \
645     } \
646 } UPRV_BLOCK_MACRO_END
647 
648 /**
649  * Move the string offset from one code point boundary to the n-th one before it,
650  * i.e., move backward by n code points.
651  * (Pre-decrementing backward iteration.)
652  * The input offset may be the same as the string length.
653  * "Unsafe" macro, assumes well-formed UTF-16.
654  *
655  * @param s const UChar * string
656  * @param i string offset
657  * @param n number of code points to skip
658  * @see U16_BACK_N
659  * @stable ICU 2.4
660  */
661 #define U16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
662     int32_t __N=(n); \
663     while(__N>0) { \
664         U16_BACK_1_UNSAFE(s, i); \
665         --__N; \
666     } \
667 } UPRV_BLOCK_MACRO_END
668 
669 /**
670  * Move the string offset from one code point boundary to the n-th one before it,
671  * i.e., move backward by n code points.
672  * (Pre-decrementing backward iteration.)
673  * The input offset may be the same as the string length.
674  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
675  *
676  * @param s const UChar * string
677  * @param start start of string
678  * @param i string offset, must be start<i
679  * @param n number of code points to skip
680  * @see U16_BACK_N_UNSAFE
681  * @stable ICU 2.4
682  */
683 #define U16_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
684     int32_t __N=(n); \
685     while(__N>0 && (i)>(start)) { \
686         U16_BACK_1(s, start, i); \
687         --__N; \
688     } \
689 } UPRV_BLOCK_MACRO_END
690 
691 /**
692  * Adjust a random-access offset to a code point boundary after a code point.
693  * If the offset is behind the lead surrogate of a surrogate pair,
694  * then the offset is incremented.
695  * Otherwise, it is not modified.
696  * The input offset may be the same as the string length.
697  * "Unsafe" macro, assumes well-formed UTF-16.
698  *
699  * @param s const UChar * string
700  * @param i string offset
701  * @see U16_SET_CP_LIMIT
702  * @stable ICU 2.4
703  */
704 #define U16_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
705     if(U16_IS_LEAD((s)[(i)-1])) { \
706         ++(i); \
707     } \
708 } UPRV_BLOCK_MACRO_END
709 
710 /**
711  * Adjust a random-access offset to a code point boundary after a code point.
712  * If the offset is behind the lead surrogate of a surrogate pair,
713  * then the offset is incremented.
714  * Otherwise, it is not modified.
715  * The input offset may be the same as the string length.
716  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
717  *
718  * The length can be negative for a NUL-terminated string.
719  *
720  * @param s const UChar * string
721  * @param start int32_t starting string offset (usually 0)
722  * @param i int32_t string offset, start<=i<=length
723  * @param length int32_t string length
724  * @see U16_SET_CP_LIMIT_UNSAFE
725  * @stable ICU 2.4
726  */
727 #define U16_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \
728     if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \
729         ++(i); \
730     } \
731 } UPRV_BLOCK_MACRO_END
732 
733 #endif
734