1 use std::collections::BTreeSet as Set;
2 use std::fs;
3 use std::io::{self, Write};
4 use std::path::Path;
5 use std::process;
6
7 pub struct Properties {
8 xid_start: Set<u32>,
9 xid_continue: Set<u32>,
10 }
11
12 impl Properties {
is_xid_start(&self, ch: char) -> bool13 pub fn is_xid_start(&self, ch: char) -> bool {
14 self.xid_start.contains(&(ch as u32))
15 }
16
is_xid_continue(&self, ch: char) -> bool17 pub fn is_xid_continue(&self, ch: char) -> bool {
18 self.xid_continue.contains(&(ch as u32))
19 }
20 }
21
parse_xid_properties(ucd_dir: &Path) -> Properties22 pub fn parse_xid_properties(ucd_dir: &Path) -> Properties {
23 let mut properties = Properties {
24 xid_start: Set::new(),
25 xid_continue: Set::new(),
26 };
27
28 let filename = "DerivedCoreProperties.txt";
29 let path = ucd_dir.join(filename);
30 let contents = fs::read_to_string(path).unwrap_or_else(|err| {
31 let suggestion =
32 "Download from https://www.unicode.org/Public/zipped/l5.0.0/UCD.zip and unzip.";
33 let _ = writeln!(io::stderr(), "{}: {err}\n{suggestion}", ucd_dir.display());
34 process::exit(1);
35 });
36
37 for (i, line) in contents.lines().enumerate() {
38 if line.starts_with('#') || line.trim().is_empty() {
39 continue;
40 }
41 let (lo, hi, name) = parse_line(line).unwrap_or_else(|| {
42 let _ = writeln!(io::stderr(), "{filename} line {i} is unexpected:\n{line}");
43 process::exit(1);
44 });
45 let set = match name {
46 "XID_Start" => &mut properties.xid_start,
47 "XID_Continue" => &mut properties.xid_continue,
48 _ => continue,
49 };
50 set.extend(lo..=hi);
51 }
52
53 properties
54 }
55
parse_line(line: &str) -> Option<(u32, u32, &str)>56 fn parse_line(line: &str) -> Option<(u32, u32, &str)> {
57 let (mut codepoint, rest) = line.split_once(';')?;
58
59 let (lo, hi);
60 codepoint = codepoint.trim();
61 if let Some((a, b)) = codepoint.split_once("..") {
62 lo = parse_codepoint(a)?;
63 hi = parse_codepoint(b)?;
64 } else {
65 lo = parse_codepoint(codepoint)?;
66 hi = lo;
67 }
68
69 let name = rest.trim().split('#').next()?.trim_end();
70 Some((lo, hi, name))
71 }
72
parse_codepoint(s: &str) -> Option<u32>73 fn parse_codepoint(s: &str) -> Option<u32> {
74 u32::from_str_radix(s, 16).ok()
75 }
76