• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015 The Servo Project Developers. See the
2 // COPYRIGHT file at the top-level directory of this distribution.
3 //
4 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. This file may not be copied, modified, or distributed
8 // except according to those terms.
9 
10 //! Accessor for `Bidi_Class` property from Unicode Character Database (UCD)
11 
12 mod tables;
13 
14 pub use self::tables::{BidiClass, UNICODE_VERSION};
15 #[cfg(feature = "hardcoded-data")]
16 use core::char;
17 #[cfg(feature = "hardcoded-data")]
18 use core::cmp::Ordering::{Equal, Greater, Less};
19 
20 #[cfg(feature = "hardcoded-data")]
21 use self::tables::bidi_class_table;
22 use crate::data_source::BidiMatchedOpeningBracket;
23 use crate::BidiClass::*;
24 #[cfg(feature = "hardcoded-data")]
25 use crate::BidiDataSource;
26 /// Hardcoded Bidi data that ships with the unicode-bidi crate.
27 ///
28 /// This can be enabled with the default `hardcoded-data` Cargo feature.
29 #[cfg(feature = "hardcoded-data")]
30 pub struct HardcodedBidiData;
31 
32 #[cfg(feature = "hardcoded-data")]
33 impl BidiDataSource for HardcodedBidiData {
bidi_class(&self, c: char) -> BidiClass34     fn bidi_class(&self, c: char) -> BidiClass {
35         bsearch_range_value_table(c, bidi_class_table)
36     }
37 }
38 
39 /// Find the `BidiClass` of a single char.
40 #[cfg(feature = "hardcoded-data")]
bidi_class(c: char) -> BidiClass41 pub fn bidi_class(c: char) -> BidiClass {
42     bsearch_range_value_table(c, bidi_class_table)
43 }
44 
45 /// If this character is a bracket according to BidiBrackets.txt,
46 /// return the corresponding *normalized* *opening bracket* of the pair,
47 /// and whether or not it itself is an opening bracket.
bidi_matched_opening_bracket(c: char) -> Option<BidiMatchedOpeningBracket>48 pub(crate) fn bidi_matched_opening_bracket(c: char) -> Option<BidiMatchedOpeningBracket> {
49     for pair in self::tables::bidi_pairs_table {
50         if pair.0 == c || pair.1 == c {
51             let skeleton = pair.2.unwrap_or(pair.0);
52             return Some(BidiMatchedOpeningBracket {
53                 opening: skeleton,
54                 is_open: pair.0 == c,
55             });
56         }
57     }
58     None
59 }
60 
is_rtl(bidi_class: BidiClass) -> bool61 pub fn is_rtl(bidi_class: BidiClass) -> bool {
62     match bidi_class {
63         RLE | RLO | RLI => true,
64         _ => false,
65     }
66 }
67 
68 #[cfg(feature = "hardcoded-data")]
bsearch_range_value_table(c: char, r: &'static [(char, char, BidiClass)]) -> BidiClass69 fn bsearch_range_value_table(c: char, r: &'static [(char, char, BidiClass)]) -> BidiClass {
70     match r.binary_search_by(|&(lo, hi, _)| {
71         if lo <= c && c <= hi {
72             Equal
73         } else if hi < c {
74             Less
75         } else {
76             Greater
77         }
78     }) {
79         Ok(idx) => {
80             let (_, _, cat) = r[idx];
81             cat
82         }
83         // UCD/extracted/DerivedBidiClass.txt: "All code points not explicitly listed
84         // for Bidi_Class have the value Left_To_Right (L)."
85         Err(_) => L,
86     }
87 }
88 
89 #[cfg(all(test, feature = "hardcoded-data"))]
90 mod tests {
91     use super::*;
92 
93     #[test]
test_ascii()94     fn test_ascii() {
95         assert_eq!(bidi_class('\u{0000}'), BN);
96         assert_eq!(bidi_class('\u{0040}'), ON);
97         assert_eq!(bidi_class('\u{0041}'), L);
98         assert_eq!(bidi_class('\u{0062}'), L);
99         assert_eq!(bidi_class('\u{007F}'), BN);
100     }
101 
102     #[test]
test_bmp()103     fn test_bmp() {
104         // Hebrew
105         assert_eq!(bidi_class('\u{0590}'), R);
106         assert_eq!(bidi_class('\u{05D0}'), R);
107         assert_eq!(bidi_class('\u{05D1}'), R);
108         assert_eq!(bidi_class('\u{05FF}'), R);
109 
110         // Arabic
111         assert_eq!(bidi_class('\u{0600}'), AN);
112         assert_eq!(bidi_class('\u{0627}'), AL);
113         assert_eq!(bidi_class('\u{07BF}'), AL);
114 
115         // Default R + Arabic Extras
116         assert_eq!(bidi_class('\u{07C0}'), R);
117         assert_eq!(bidi_class('\u{085F}'), R);
118         assert_eq!(bidi_class('\u{0860}'), AL);
119         assert_eq!(bidi_class('\u{0870}'), AL);
120         assert_eq!(bidi_class('\u{089F}'), NSM);
121         assert_eq!(bidi_class('\u{08A0}'), AL);
122         assert_eq!(bidi_class('\u{089F}'), NSM);
123         assert_eq!(bidi_class('\u{08FF}'), NSM);
124 
125         // Default ET
126         assert_eq!(bidi_class('\u{20A0}'), ET);
127         assert_eq!(bidi_class('\u{20CF}'), ET);
128 
129         // Arabic Presentation Forms
130         assert_eq!(bidi_class('\u{FB1D}'), R);
131         assert_eq!(bidi_class('\u{FB4F}'), R);
132         assert_eq!(bidi_class('\u{FB50}'), AL);
133         assert_eq!(bidi_class('\u{FDCF}'), ON);
134         assert_eq!(bidi_class('\u{FDF0}'), AL);
135         assert_eq!(bidi_class('\u{FDFF}'), ON);
136         assert_eq!(bidi_class('\u{FE70}'), AL);
137         assert_eq!(bidi_class('\u{FEFE}'), AL);
138         assert_eq!(bidi_class('\u{FEFF}'), BN);
139 
140         // noncharacters
141         assert_eq!(bidi_class('\u{FDD0}'), L);
142         assert_eq!(bidi_class('\u{FDD1}'), L);
143         assert_eq!(bidi_class('\u{FDEE}'), L);
144         assert_eq!(bidi_class('\u{FDEF}'), L);
145         assert_eq!(bidi_class('\u{FFFE}'), L);
146         assert_eq!(bidi_class('\u{FFFF}'), L);
147     }
148 
149     #[test]
test_smp()150     fn test_smp() {
151         // Default AL + R
152         assert_eq!(bidi_class('\u{10800}'), R);
153         assert_eq!(bidi_class('\u{10FFF}'), R);
154         assert_eq!(bidi_class('\u{1E800}'), R);
155         assert_eq!(bidi_class('\u{1EDFF}'), R);
156         assert_eq!(bidi_class('\u{1EE00}'), AL);
157         assert_eq!(bidi_class('\u{1EEFF}'), AL);
158         assert_eq!(bidi_class('\u{1EF00}'), R);
159         assert_eq!(bidi_class('\u{1EFFF}'), R);
160     }
161 
162     #[test]
test_unassigned_planes()163     fn test_unassigned_planes() {
164         assert_eq!(bidi_class('\u{30000}'), L);
165         assert_eq!(bidi_class('\u{40000}'), L);
166         assert_eq!(bidi_class('\u{50000}'), L);
167         assert_eq!(bidi_class('\u{60000}'), L);
168         assert_eq!(bidi_class('\u{70000}'), L);
169         assert_eq!(bidi_class('\u{80000}'), L);
170         assert_eq!(bidi_class('\u{90000}'), L);
171         assert_eq!(bidi_class('\u{a0000}'), L);
172     }
173 }
174