• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *  * Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  *  * Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in
12  *    the documentation and/or other materials provided with the
13  *    distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19  * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <wchar.h>
30 
31 #include "private/icu4x.h"
32 
wcwidth(wchar_t wc)33 int wcwidth(wchar_t wc) {
34   // Fast-path ASCII.
35   if (wc >= 0x20 && wc < 0x7f) return 1;
36 
37   // ASCII NUL is a special case.
38   if (wc == 0) return 0;
39 
40   // C0.
41   if (wc < ' ' || (wc >= 0x7f && wc <= 0xa0)) return -1;
42 
43   // Now for the i18n part. This isn't defined or standardized, so a lot of the choices are
44   // pretty arbitrary. See https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c for more details.
45 
46   // Fancy unicode control characters?
47   switch (__icu4x_bionic_general_category(wc)) {
48     case U_CONTROL_CHAR:
49       return -1;
50     case U_NON_SPACING_MARK:
51     case U_ENCLOSING_MARK:
52       return 0;
53     case U_FORMAT_CHAR:
54       // A special case for soft hyphen (U+00AD) to match historical practice.
55       // See the tests for more commentary.
56       return (wc == 0x00ad) ? 1 : 0;
57   }
58 
59   // Medial and final jamo render as zero width when used correctly,
60   // so we handle them specially rather than relying on East Asian Width.
61   switch (__icu4x_bionic_hangul_syllable_type(wc)) {
62     case U_HST_VOWEL_JAMO:
63     case U_HST_TRAILING_JAMO:
64       return 0;
65     case U_HST_LEADING_JAMO:
66     case U_HST_LV_SYLLABLE:
67     case U_HST_LVT_SYLLABLE:
68       return 2;
69   }
70 
71   // Hangeul choseong filler U+115F is default ignorable, so we check default
72   // ignorability only after we've already handled Hangeul jamo above.
73   if (__icu4x_bionic_is_default_ignorable_code_point(wc)) return 0;
74 
75   // A few weird special cases where EastAsianWidth is not helpful for us.
76   if (wc >= 0x3248 && wc <= 0x4dff) {
77     // Circled two-digit CJK "speed sign" numbers. EastAsianWidth is ambiguous,
78     // but wide makes more sense.
79     if (wc <= 0x324f) return 2;
80     // Hexagrams. EastAsianWidth is neutral, but wide seems better.
81     if (wc >= 0x4dc0) return 2;
82   }
83 
84   // The EastAsianWidth property is at least defined by the Unicode standard!
85   // https://www.unicode.org/reports/tr11/
86   switch (__icu4x_bionic_east_asian_width(wc)) {
87     case U_EA_AMBIGUOUS:
88     case U_EA_HALFWIDTH:
89     case U_EA_NARROW:
90     case U_EA_NEUTRAL:
91       return 1;
92     case U_EA_FULLWIDTH:
93     case U_EA_WIDE:
94       return 2;
95   }
96 
97   return 0;
98 }
99