• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /// Compute the display width of `text`
2 ///
3 /// # Examples
4 ///
5 /// **Note:** When the `unicode` Cargo feature is disabled, all characters are presumed to take up
6 /// 1 width.  With the feature enabled, function will correctly deal with [combining characters] in
7 /// their decomposed form (see [Unicode equivalence]).
8 ///
9 /// An example of a decomposed character is “é”, which can be decomposed into: “e” followed by a
10 /// combining acute accent: “◌́”.  Without the `unicode` Cargo feature, every `char` has a width of
11 /// 1. This includes the combining accent:
12 ///
13 /// ## Emojis and CJK Characters
14 ///
15 /// Characters such as emojis and [CJK characters] used in the
16 /// Chinese, Japanese, and Korean languages are seen as double-width,
17 /// even if the `unicode-width` feature is disabled:
18 ///
19 /// # Limitations
20 ///
21 /// The displayed width of a string cannot always be computed from the
22 /// string alone. This is because the width depends on the rendering
23 /// engine used. This is particularly visible with [emoji modifier
24 /// sequences] where a base emoji is modified with, e.g., skin tone or
25 /// hair color modifiers. It is up to the rendering engine to detect
26 /// this and to produce a suitable emoji.
27 ///
28 /// A simple example is “❤️”, which consists of “❤” (U+2764: Black
29 /// Heart Symbol) followed by U+FE0F (Variation Selector-16). By
30 /// itself, “❤” is a black heart, but if you follow it with the
31 /// variant selector, you may get a wider red heart.
32 ///
33 /// A more complex example would be “��‍��” which should depict a man
34 /// with red hair. Here the computed width is too large — and the
35 /// width differs depending on the use of the `unicode-width` feature:
36 ///
37 /// This happens because the grapheme consists of three code points:
38 /// “��” (U+1F468: Man), Zero Width Joiner (U+200D), and “��”
39 /// (U+1F9B0: Red Hair). You can see them above in the test. With
40 /// `unicode-width` enabled, the ZWJ is correctly seen as having zero
41 /// width, without it is counted as a double-width character.
42 ///
43 /// ## Terminal Support
44 ///
45 /// Modern browsers typically do a great job at combining characters
46 /// as shown above, but terminals often struggle more. As an example,
47 /// Gnome Terminal version 3.38.1, shows “❤️” as a big red heart, but
48 /// shows "��‍��" as “����”.
49 ///
50 /// [combining characters]: https://en.wikipedia.org/wiki/Combining_character
51 /// [Unicode equivalence]: https://en.wikipedia.org/wiki/Unicode_equivalence
52 /// [CJK characters]: https://en.wikipedia.org/wiki/CJK_characters
53 /// [emoji modifier sequences]: https://unicode.org/emoji/charts/full-emoji-modifiers.html
54 #[inline(never)]
display_width(text: &str) -> usize55 pub(crate) fn display_width(text: &str) -> usize {
56     let mut width = 0;
57 
58     let mut control_sequence = false;
59     let control_terminate: char = 'm';
60 
61     for ch in text.chars() {
62         if ch.is_ascii_control() {
63             control_sequence = true;
64         } else if control_sequence && ch == control_terminate {
65             control_sequence = false;
66             continue;
67         }
68 
69         if !control_sequence {
70             width += ch_width(ch);
71         }
72     }
73     width
74 }
75 
76 #[cfg(feature = "unicode")]
ch_width(ch: char) -> usize77 fn ch_width(ch: char) -> usize {
78     unicode_width::UnicodeWidthChar::width(ch).unwrap_or(0)
79 }
80 
81 #[cfg(not(feature = "unicode"))]
ch_width(_: char) -> usize82 fn ch_width(_: char) -> usize {
83     1
84 }
85 
86 #[cfg(test)]
87 mod tests {
88     use super::*;
89 
90     #[cfg(feature = "unicode")]
91     use unicode_width::UnicodeWidthChar;
92 
93     #[test]
emojis_have_correct_width()94     fn emojis_have_correct_width() {
95         use unic_emoji_char::is_emoji;
96 
97         // Emojis in the Basic Latin (ASCII) and Latin-1 Supplement
98         // blocks all have a width of 1 column. This includes
99         // characters such as '#' and '©'.
100         for ch in '\u{1}'..'\u{FF}' {
101             if is_emoji(ch) {
102                 let desc = format!("{:?} U+{:04X}", ch, ch as u32);
103 
104                 #[cfg(feature = "unicode")]
105                 assert_eq!(ch.width().unwrap(), 1, "char: {}", desc);
106 
107                 #[cfg(not(feature = "unicode"))]
108                 assert_eq!(ch_width(ch), 1, "char: {desc}");
109             }
110         }
111 
112         // Emojis in the remaining blocks of the Basic Multilingual
113         // Plane (BMP), in the Supplementary Multilingual Plane (SMP),
114         // and in the Supplementary Ideographic Plane (SIP), are all 1
115         // or 2 columns wide when unicode-width is used, and always 2
116         // columns wide otherwise. This includes all of our favorite
117         // emojis such as ��.
118         for ch in '\u{FF}'..'\u{2FFFF}' {
119             if is_emoji(ch) {
120                 let desc = format!("{:?} U+{:04X}", ch, ch as u32);
121 
122                 #[cfg(feature = "unicode")]
123                 assert!(ch.width().unwrap() <= 2, "char: {}", desc);
124 
125                 #[cfg(not(feature = "unicode"))]
126                 assert_eq!(ch_width(ch), 1, "char: {desc}");
127             }
128         }
129 
130         // The remaining planes contain almost no assigned code points
131         // and thus also no emojis.
132     }
133 
134     #[test]
135     #[cfg(feature = "unicode")]
display_width_works()136     fn display_width_works() {
137         assert_eq!("Café Plain".len(), 11); // “é” is two bytes
138         assert_eq!(display_width("Café Plain"), 10);
139     }
140 
141     #[test]
142     #[cfg(feature = "unicode")]
display_width_narrow_emojis()143     fn display_width_narrow_emojis() {
144         assert_eq!(display_width("⁉"), 1);
145     }
146 
147     #[test]
148     #[cfg(feature = "unicode")]
display_width_narrow_emojis_variant_selector()149     fn display_width_narrow_emojis_variant_selector() {
150         assert_eq!(display_width("⁉\u{fe0f}"), 1);
151     }
152 
153     #[test]
154     #[cfg(feature = "unicode")]
display_width_emojis()155     fn display_width_emojis() {
156         assert_eq!(display_width("��������✨����������"), 20);
157     }
158 }
159