• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 use std::collections::BTreeSet as Set;
2 use std::fs;
3 use std::io::{self, Write};
4 use std::path::Path;
5 use std::process;
6 
7 pub struct Properties {
8     xid_start: Set<u32>,
9     xid_continue: Set<u32>,
10 }
11 
12 impl Properties {
is_xid_start(&self, ch: char) -> bool13     pub fn is_xid_start(&self, ch: char) -> bool {
14         self.xid_start.contains(&(ch as u32))
15     }
16 
is_xid_continue(&self, ch: char) -> bool17     pub fn is_xid_continue(&self, ch: char) -> bool {
18         self.xid_continue.contains(&(ch as u32))
19     }
20 }
21 
parse_xid_properties(ucd_dir: &Path) -> Properties22 pub fn parse_xid_properties(ucd_dir: &Path) -> Properties {
23     let mut properties = Properties {
24         xid_start: Set::new(),
25         xid_continue: Set::new(),
26     };
27 
28     let filename = "DerivedCoreProperties.txt";
29     let path = ucd_dir.join(filename);
30     let contents = fs::read_to_string(path).unwrap_or_else(|err| {
31         let suggestion =
32             "Download from https://www.unicode.org/Public/zipped/l5.0.0/UCD.zip and unzip.";
33         let _ = writeln!(io::stderr(), "{}: {err}\n{suggestion}", ucd_dir.display());
34         process::exit(1);
35     });
36 
37     for (i, line) in contents.lines().enumerate() {
38         if line.starts_with('#') || line.trim().is_empty() {
39             continue;
40         }
41         let (lo, hi, name) = parse_line(line).unwrap_or_else(|| {
42             let _ = writeln!(io::stderr(), "{filename} line {i} is unexpected:\n{line}");
43             process::exit(1);
44         });
45         let set = match name {
46             "XID_Start" => &mut properties.xid_start,
47             "XID_Continue" => &mut properties.xid_continue,
48             _ => continue,
49         };
50         set.extend(lo..=hi);
51     }
52 
53     properties
54 }
55 
parse_line(line: &str) -> Option<(u32, u32, &str)>56 fn parse_line(line: &str) -> Option<(u32, u32, &str)> {
57     let (mut codepoint, rest) = line.split_once(';')?;
58 
59     let (lo, hi);
60     codepoint = codepoint.trim();
61     if let Some((a, b)) = codepoint.split_once("..") {
62         lo = parse_codepoint(a)?;
63         hi = parse_codepoint(b)?;
64     } else {
65         lo = parse_codepoint(codepoint)?;
66         hi = lo;
67     }
68 
69     let name = rest.trim().split('#').next()?.trim_end();
70     Some((lo, hi, name))
71 }
72 
parse_codepoint(s: &str) -> Option<u32>73 fn parse_codepoint(s: &str) -> Option<u32> {
74     u32::from_str_radix(s, 16).ok()
75 }
76