• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10 
11 #[cfg(feature = "bench")]
12 use std::iter;
13 #[cfg(feature = "bench")]
14 use test::{self, Bencher};
15 #[cfg(feature = "bench")]
16 use super::{UnicodeWidthChar, UnicodeWidthStr};
17 
18 use std::prelude::v1::*;
19 
20 #[cfg(feature = "bench")]
21 #[bench]
cargo(b: &mut Bencher)22 fn cargo(b: &mut Bencher) {
23     let string = iter::repeat('a').take(4096).collect::<String>();
24 
25     b.iter(|| {
26         for c in string.chars() {
27             test::black_box(UnicodeWidthChar::width(c));
28         }
29     });
30 }
31 
32 #[cfg(feature = "bench")]
33 #[bench]
34 #[allow(deprecated)]
stdlib(b: &mut Bencher)35 fn stdlib(b: &mut Bencher) {
36     let string = iter::repeat('a').take(4096).collect::<String>();
37 
38     b.iter(|| {
39         for c in string.chars() {
40             test::black_box(c.width());
41         }
42     });
43 }
44 
45 #[cfg(feature = "bench")]
46 #[bench]
simple_if(b: &mut Bencher)47 fn simple_if(b: &mut Bencher) {
48     let string = iter::repeat('a').take(4096).collect::<String>();
49 
50     b.iter(|| {
51         for c in string.chars() {
52             test::black_box(simple_width_if(c));
53         }
54     });
55 }
56 
57 #[cfg(feature = "bench")]
58 #[bench]
simple_match(b: &mut Bencher)59 fn simple_match(b: &mut Bencher) {
60     let string = iter::repeat('a').take(4096).collect::<String>();
61 
62     b.iter(|| {
63         for c in string.chars() {
64             test::black_box(simple_width_match(c));
65         }
66     });
67 }
68 
69 #[cfg(feature = "bench")]
70 #[inline]
simple_width_if(c: char) -> Option<usize>71 fn simple_width_if(c: char) -> Option<usize> {
72     let cu = c as u32;
73     if cu < 127 {
74         if cu > 31 {
75             Some(1)
76         } else if cu == 0 {
77             Some(0)
78         } else {
79             None
80         }
81     } else {
82         UnicodeWidthChar::width(c)
83     }
84 }
85 
86 #[cfg(feature = "bench")]
87 #[inline]
simple_width_match(c: char) -> Option<usize>88 fn simple_width_match(c: char) -> Option<usize> {
89     match c as u32 {
90         cu if cu == 0 => Some(0),
91         cu if cu < 0x20 => None,
92         cu if cu < 0x7f => Some(1),
93         _ => UnicodeWidthChar::width(c)
94     }
95 }
96 #[cfg(all(feature = "bench", not(feature = "no_std")))]
97 #[bench]
enwik8(b: &mut Bencher)98 fn enwik8(b: &mut Bencher) {
99     // To benchmark, download & unzip `enwik8` from https://data.deepai.org/enwik8.zip
100     let data_path = "bench_data/enwik8";
101     let string = std::fs::read_to_string(data_path).unwrap_or_default();
102     b.iter(|| test::black_box(UnicodeWidthStr::width(string.as_str())));
103 }
104 #[cfg(all(feature = "bench", not(feature = "no_std")))]
105 #[bench]
jawiki(b: &mut Bencher)106 fn jawiki(b: &mut Bencher) {
107     // To benchmark, download & extract `jawiki-20220501-pages-articles-multistream-index.txt` from
108     // https://dumps.wikimedia.org/jawiki/20220501/jawiki-20220501-pages-articles-multistream-index.txt.bz2
109     let data_path = "bench_data/jawiki-20220501-pages-articles-multistream-index.txt";
110     let string = std::fs::read_to_string(data_path).unwrap_or_default();
111     b.iter(|| test::black_box(UnicodeWidthStr::width(string.as_str())));
112 }
113 #[test]
test_str()114 fn test_str() {
115     use super::UnicodeWidthStr;
116 
117     assert_eq!(UnicodeWidthStr::width("hello"), 10);
118     assert_eq!("hello".width_cjk(), 10);
119     assert_eq!(UnicodeWidthStr::width("\0\0\0\x01\x01"), 0);
120     assert_eq!("\0\0\0\x01\x01".width_cjk(), 0);
121     assert_eq!(UnicodeWidthStr::width(""), 0);
122     assert_eq!("".width_cjk(), 0);
123     assert_eq!(UnicodeWidthStr::width("\u{2081}\u{2082}\u{2083}\u{2084}"), 4);
124     assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width_cjk(), 8);
125 }
126 
127 #[test]
test_emoji()128 fn test_emoji() {
129     // Example from the README.
130     use super::UnicodeWidthStr;
131 
132     assert_eq!(UnicodeWidthStr::width("��"), 2); // Woman
133     assert_eq!(UnicodeWidthStr::width("��"), 2); // Microscope
134     assert_eq!(UnicodeWidthStr::width("��‍��"), 4); // Woman scientist
135 }
136 
137 #[test]
test_char()138 fn test_char() {
139     use super::UnicodeWidthChar;
140     #[cfg(feature = "no_std")]
141     use core::option::Option::{Some, None};
142 
143     assert_eq!(UnicodeWidthChar::width('h'), Some(2));
144     assert_eq!('h'.width_cjk(), Some(2));
145     assert_eq!(UnicodeWidthChar::width('\x00'), Some(0));
146     assert_eq!('\x00'.width_cjk(), Some(0));
147     assert_eq!(UnicodeWidthChar::width('\x01'), None);
148     assert_eq!('\x01'.width_cjk(), None);
149     assert_eq!(UnicodeWidthChar::width('\u{2081}'), Some(1));
150     assert_eq!('\u{2081}'.width_cjk(), Some(2));
151 }
152 
153 #[test]
test_char2()154 fn test_char2() {
155     use super::UnicodeWidthChar;
156     #[cfg(feature = "no_std")]
157     use core::option::Option::{Some, None};
158 
159     assert_eq!(UnicodeWidthChar::width('\x00'),Some(0));
160     assert_eq!('\x00'.width_cjk(),Some(0));
161 
162     assert_eq!(UnicodeWidthChar::width('\x0A'),None);
163     assert_eq!('\x0A'.width_cjk(),None);
164 
165     assert_eq!(UnicodeWidthChar::width('w'),Some(1));
166     assert_eq!('w'.width_cjk(),Some(1));
167 
168     assert_eq!(UnicodeWidthChar::width('h'),Some(2));
169     assert_eq!('h'.width_cjk(),Some(2));
170 
171     assert_eq!(UnicodeWidthChar::width('\u{AD}'),Some(1));
172     assert_eq!('\u{AD}'.width_cjk(),Some(1));
173 
174     assert_eq!(UnicodeWidthChar::width('\u{1160}'),Some(0));
175     assert_eq!('\u{1160}'.width_cjk(),Some(0));
176 
177     assert_eq!(UnicodeWidthChar::width('\u{a1}'),Some(1));
178     assert_eq!('\u{a1}'.width_cjk(),Some(2));
179 
180     assert_eq!(UnicodeWidthChar::width('\u{300}'),Some(0));
181     assert_eq!('\u{300}'.width_cjk(),Some(0));
182 }
183 
184 #[test]
unicode_12()185 fn unicode_12() {
186     use super::UnicodeWidthChar;
187     #[cfg(feature = "no_std")]
188     use core::option::Option::{Some, None};
189 
190     assert_eq!(UnicodeWidthChar::width('\u{1F971}'), Some(2));
191 }
192