• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2021, Alex Huszagh. Unlicensed.
2 // See https://unlicense.org/
3 
4 use std::io::prelude::*;
5 use std::path::PathBuf;
6 use std::{env, fs, io};
7 
8 // HELPERS
9 // -------
10 
11 // These functions are simple, resuable componetns
12 
13 /// Find and parse sign and get remaining bytes.
14 #[inline]
parse_sign<'a>(bytes: &'a [u8]) -> (bool, &'a [u8])15 fn parse_sign<'a>(bytes: &'a [u8]) -> (bool, &'a [u8]) {
16     match bytes.get(0) {
17         Some(&b'+') => (true, &bytes[1..]),
18         Some(&b'-') => (false, &bytes[1..]),
19         _ => (true, bytes),
20     }
21 }
22 
23 // Convert u8 to digit.
24 #[inline]
to_digit(c: u8) -> Option<u32>25 fn to_digit(c: u8) -> Option<u32> {
26     (c as char).to_digit(10)
27 }
28 
29 // Add digit from exponent.
30 #[inline]
add_digit_i32(value: i32, digit: u32) -> Option<i32>31 fn add_digit_i32(value: i32, digit: u32) -> Option<i32> {
32     return value.checked_mul(10)?.checked_add(digit as i32);
33 }
34 
35 // Subtract digit from exponent.
36 #[inline]
sub_digit_i32(value: i32, digit: u32) -> Option<i32>37 fn sub_digit_i32(value: i32, digit: u32) -> Option<i32> {
38     return value.checked_mul(10)?.checked_sub(digit as i32);
39 }
40 
41 // Convert character to digit.
42 #[inline]
is_digit(c: u8) -> bool43 fn is_digit(c: u8) -> bool {
44     to_digit(c).is_some()
45 }
46 
47 // Split buffer at index.
48 #[inline]
split_at_index<'a>(digits: &'a [u8], index: usize) -> (&'a [u8], &'a [u8])49 fn split_at_index<'a>(digits: &'a [u8], index: usize) -> (&'a [u8], &'a [u8]) {
50     (&digits[..index], &digits[index..])
51 }
52 
53 /// Consume until a an invalid digit is found.
54 ///
55 /// - `digits`      - Slice containing 0 or more digits.
56 #[inline]
consume_digits<'a>(digits: &'a [u8]) -> (&'a [u8], &'a [u8])57 fn consume_digits<'a>(digits: &'a [u8]) -> (&'a [u8], &'a [u8]) {
58     // Consume all digits.
59     let mut index = 0;
60     while index < digits.len() && is_digit(digits[index]) {
61         index += 1;
62     }
63     split_at_index(digits, index)
64 }
65 
66 // Trim leading 0s.
67 #[inline]
ltrim_zero<'a>(bytes: &'a [u8]) -> &'a [u8]68 fn ltrim_zero<'a>(bytes: &'a [u8]) -> &'a [u8] {
69     let count = bytes.iter().take_while(|&&si| si == b'0').count();
70     &bytes[count..]
71 }
72 
73 // Trim trailing 0s.
74 #[inline]
rtrim_zero<'a>(bytes: &'a [u8]) -> &'a [u8]75 fn rtrim_zero<'a>(bytes: &'a [u8]) -> &'a [u8] {
76     let count = bytes.iter().rev().take_while(|&&si| si == b'0').count();
77     let index = bytes.len() - count;
78     &bytes[..index]
79 }
80 
81 // PARSERS
82 // -------
83 
84 /// Parse the exponent of the float.
85 ///
86 /// * `exponent`    - Slice containing the exponent digits.
87 /// * `is_positive` - If the exponent sign is positive.
parse_exponent(exponent: &[u8], is_positive: bool) -> i3288 fn parse_exponent(exponent: &[u8], is_positive: bool) -> i32 {
89     // Parse the sign bit or current data.
90     let mut value: i32 = 0;
91     match is_positive {
92         true => {
93             for c in exponent {
94                 value = match add_digit_i32(value, to_digit(*c).unwrap()) {
95                     Some(v) => v,
96                     None => return i32::max_value(),
97                 };
98             }
99         },
100         false => {
101             for c in exponent {
102                 value = match sub_digit_i32(value, to_digit(*c).unwrap()) {
103                     Some(v) => v,
104                     None => return i32::min_value(),
105                 };
106             }
107         },
108     }
109 
110     value
111 }
112 
113 /// Parse float from input bytes, returning the float and the remaining bytes.
114 ///
115 /// * `bytes`    - Array of bytes leading with float-data.
parse_float<'a, F>(bytes: &'a [u8]) -> (F, &'a [u8]) where F: minimal_lexical::Float,116 fn parse_float<'a, F>(bytes: &'a [u8]) -> (F, &'a [u8])
117 where
118     F: minimal_lexical::Float,
119 {
120     // Parse the sign.
121     let (is_positive, bytes) = parse_sign(bytes);
122 
123     // Extract and parse the float components:
124     let (integer_slc, bytes) = consume_digits(bytes);
125     let (fraction_slc, bytes) = match bytes.first() {
126         Some(&b'.') => consume_digits(&bytes[1..]),
127         _ => (&bytes[..0], bytes),
128     };
129     let (exponent, bytes) = match bytes.first() {
130         Some(&b'e') | Some(&b'E') => {
131             // Extract and parse the exponent.
132             let (is_positive, bytes) = parse_sign(&bytes[1..]);
133             let (exponent, bytes) = consume_digits(bytes);
134             (parse_exponent(exponent, is_positive), bytes)
135         },
136         _ => (0, bytes),
137     };
138 
139     // Trim leading and trailing zeros.
140     let integer_slc = ltrim_zero(integer_slc);
141     let fraction_slc = rtrim_zero(fraction_slc);
142 
143     // Create the float and return our data.
144     let mut float: F =
145         minimal_lexical::parse_float(integer_slc.iter(), fraction_slc.iter(), exponent);
146     if !is_positive {
147         float = -float;
148     }
149 
150     (float, bytes)
151 }
152 
153 // PATH
154 
155 /// Return the `target/debug` or `target/release` directory path.
build_dir() -> PathBuf156 pub fn build_dir() -> PathBuf {
157     env::current_exe()
158         .expect("unittest executable path")
159         .parent()
160         .expect("debug/release directory")
161         .to_path_buf()
162 }
163 
164 /// Return the `target` directory path.
target_dir() -> PathBuf165 pub fn target_dir() -> PathBuf {
166     build_dir().parent().expect("target directory").to_path_buf()
167 }
168 
169 /// Return the project directory path.
project_dir() -> PathBuf170 pub fn project_dir() -> PathBuf {
171     target_dir().parent().expect("project directory").to_path_buf()
172 }
173 
174 /// Return the `data` directory path.
data_dir() -> PathBuf175 pub fn data_dir() -> PathBuf {
176     let mut dir = project_dir();
177     dir.push("test-parse-golang");
178     dir.push("parse-number-fxx-test-data");
179     dir.push("data");
180     dir
181 }
182 
run_test(line: &str)183 fn run_test(line: &str) {
184     // Tests have the following format:
185     //      hhhh ssssssss dddddddddddddddddd ....
186     // The `hhhh` part is the hexadecimal representation for f16,
187     // the `ssssssss` part is the hexadecimal representation of f32,
188     // the `dddddddddddddddddd` is the hex representation of f64,
189     // and the remaining bytes are the string to parse.
190     let hex32 = line[5..13].to_lowercase();
191     let hex64 = line[14..30].to_lowercase();
192     let string = &line[31..];
193 
194     let float32: f32 = parse_float(string.as_bytes()).0;
195     let float64: f64 = parse_float(string.as_bytes()).0;
196     assert_eq!(hex32, format!("{:0>8x}", float32.to_bits()));
197     assert_eq!(hex64, format!("{:0>16x}", float64.to_bits()));
198 }
199 
main()200 fn main() {
201     // Iterate over all .txt files in the directory.
202     let paths = fs::read_dir(data_dir()).expect("Please update the Git submodule");
203     for direntry in paths {
204         let path = direntry.unwrap().path();
205         if path.extension().unwrap() == "txt" {
206             // Have a data file, parse and run the tests.
207             let filename = path.file_name().unwrap().to_str().unwrap();
208             println!("Running Test: {}", filename);
209             let file = fs::File::open(path).unwrap();
210             let reader = io::BufReader::new(file);
211             let mut count: usize = 0;
212             for line in reader.lines() {
213                 let line = line.unwrap();
214                 run_test(&line);
215                 count += 1;
216             }
217             println!("Ran {} tests.", count);
218         }
219     }
220 }
221