1 // Copyright 2021, Alex Huszagh. Unlicensed.
2 // See https://unlicense.org/
3
4 use std::io::prelude::*;
5 use std::path::PathBuf;
6 use std::{env, fs, io};
7
8 // HELPERS
9 // -------
10
11 // These functions are simple, resuable componetns
12
13 /// Find and parse sign and get remaining bytes.
14 #[inline]
parse_sign<'a>(bytes: &'a [u8]) -> (bool, &'a [u8])15 fn parse_sign<'a>(bytes: &'a [u8]) -> (bool, &'a [u8]) {
16 match bytes.get(0) {
17 Some(&b'+') => (true, &bytes[1..]),
18 Some(&b'-') => (false, &bytes[1..]),
19 _ => (true, bytes),
20 }
21 }
22
23 // Convert u8 to digit.
24 #[inline]
to_digit(c: u8) -> Option<u32>25 fn to_digit(c: u8) -> Option<u32> {
26 (c as char).to_digit(10)
27 }
28
29 // Add digit from exponent.
30 #[inline]
add_digit_i32(value: i32, digit: u32) -> Option<i32>31 fn add_digit_i32(value: i32, digit: u32) -> Option<i32> {
32 return value.checked_mul(10)?.checked_add(digit as i32);
33 }
34
35 // Subtract digit from exponent.
36 #[inline]
sub_digit_i32(value: i32, digit: u32) -> Option<i32>37 fn sub_digit_i32(value: i32, digit: u32) -> Option<i32> {
38 return value.checked_mul(10)?.checked_sub(digit as i32);
39 }
40
41 // Convert character to digit.
42 #[inline]
is_digit(c: u8) -> bool43 fn is_digit(c: u8) -> bool {
44 to_digit(c).is_some()
45 }
46
47 // Split buffer at index.
48 #[inline]
split_at_index<'a>(digits: &'a [u8], index: usize) -> (&'a [u8], &'a [u8])49 fn split_at_index<'a>(digits: &'a [u8], index: usize) -> (&'a [u8], &'a [u8]) {
50 (&digits[..index], &digits[index..])
51 }
52
53 /// Consume until a an invalid digit is found.
54 ///
55 /// - `digits` - Slice containing 0 or more digits.
56 #[inline]
consume_digits<'a>(digits: &'a [u8]) -> (&'a [u8], &'a [u8])57 fn consume_digits<'a>(digits: &'a [u8]) -> (&'a [u8], &'a [u8]) {
58 // Consume all digits.
59 let mut index = 0;
60 while index < digits.len() && is_digit(digits[index]) {
61 index += 1;
62 }
63 split_at_index(digits, index)
64 }
65
66 // Trim leading 0s.
67 #[inline]
ltrim_zero<'a>(bytes: &'a [u8]) -> &'a [u8]68 fn ltrim_zero<'a>(bytes: &'a [u8]) -> &'a [u8] {
69 let count = bytes.iter().take_while(|&&si| si == b'0').count();
70 &bytes[count..]
71 }
72
73 // Trim trailing 0s.
74 #[inline]
rtrim_zero<'a>(bytes: &'a [u8]) -> &'a [u8]75 fn rtrim_zero<'a>(bytes: &'a [u8]) -> &'a [u8] {
76 let count = bytes.iter().rev().take_while(|&&si| si == b'0').count();
77 let index = bytes.len() - count;
78 &bytes[..index]
79 }
80
81 // PARSERS
82 // -------
83
84 /// Parse the exponent of the float.
85 ///
86 /// * `exponent` - Slice containing the exponent digits.
87 /// * `is_positive` - If the exponent sign is positive.
parse_exponent(exponent: &[u8], is_positive: bool) -> i3288 fn parse_exponent(exponent: &[u8], is_positive: bool) -> i32 {
89 // Parse the sign bit or current data.
90 let mut value: i32 = 0;
91 match is_positive {
92 true => {
93 for c in exponent {
94 value = match add_digit_i32(value, to_digit(*c).unwrap()) {
95 Some(v) => v,
96 None => return i32::max_value(),
97 };
98 }
99 },
100 false => {
101 for c in exponent {
102 value = match sub_digit_i32(value, to_digit(*c).unwrap()) {
103 Some(v) => v,
104 None => return i32::min_value(),
105 };
106 }
107 },
108 }
109
110 value
111 }
112
113 /// Parse float from input bytes, returning the float and the remaining bytes.
114 ///
115 /// * `bytes` - Array of bytes leading with float-data.
parse_float<'a, F>(bytes: &'a [u8]) -> (F, &'a [u8]) where F: minimal_lexical::Float,116 fn parse_float<'a, F>(bytes: &'a [u8]) -> (F, &'a [u8])
117 where
118 F: minimal_lexical::Float,
119 {
120 // Parse the sign.
121 let (is_positive, bytes) = parse_sign(bytes);
122
123 // Extract and parse the float components:
124 let (integer_slc, bytes) = consume_digits(bytes);
125 let (fraction_slc, bytes) = match bytes.first() {
126 Some(&b'.') => consume_digits(&bytes[1..]),
127 _ => (&bytes[..0], bytes),
128 };
129 let (exponent, bytes) = match bytes.first() {
130 Some(&b'e') | Some(&b'E') => {
131 // Extract and parse the exponent.
132 let (is_positive, bytes) = parse_sign(&bytes[1..]);
133 let (exponent, bytes) = consume_digits(bytes);
134 (parse_exponent(exponent, is_positive), bytes)
135 },
136 _ => (0, bytes),
137 };
138
139 // Trim leading and trailing zeros.
140 let integer_slc = ltrim_zero(integer_slc);
141 let fraction_slc = rtrim_zero(fraction_slc);
142
143 // Create the float and return our data.
144 let mut float: F =
145 minimal_lexical::parse_float(integer_slc.iter(), fraction_slc.iter(), exponent);
146 if !is_positive {
147 float = -float;
148 }
149
150 (float, bytes)
151 }
152
153 // PATH
154
155 /// Return the `target/debug` or `target/release` directory path.
build_dir() -> PathBuf156 pub fn build_dir() -> PathBuf {
157 env::current_exe()
158 .expect("unittest executable path")
159 .parent()
160 .expect("debug/release directory")
161 .to_path_buf()
162 }
163
164 /// Return the `target` directory path.
target_dir() -> PathBuf165 pub fn target_dir() -> PathBuf {
166 build_dir().parent().expect("target directory").to_path_buf()
167 }
168
169 /// Return the project directory path.
project_dir() -> PathBuf170 pub fn project_dir() -> PathBuf {
171 target_dir().parent().expect("project directory").to_path_buf()
172 }
173
174 /// Return the `data` directory path.
data_dir() -> PathBuf175 pub fn data_dir() -> PathBuf {
176 let mut dir = project_dir();
177 dir.push("test-parse-golang");
178 dir.push("parse-number-fxx-test-data");
179 dir.push("data");
180 dir
181 }
182
run_test(line: &str)183 fn run_test(line: &str) {
184 // Tests have the following format:
185 // hhhh ssssssss dddddddddddddddddd ....
186 // The `hhhh` part is the hexadecimal representation for f16,
187 // the `ssssssss` part is the hexadecimal representation of f32,
188 // the `dddddddddddddddddd` is the hex representation of f64,
189 // and the remaining bytes are the string to parse.
190 let hex32 = line[5..13].to_lowercase();
191 let hex64 = line[14..30].to_lowercase();
192 let string = &line[31..];
193
194 let float32: f32 = parse_float(string.as_bytes()).0;
195 let float64: f64 = parse_float(string.as_bytes()).0;
196 assert_eq!(hex32, format!("{:0>8x}", float32.to_bits()));
197 assert_eq!(hex64, format!("{:0>16x}", float64.to_bits()));
198 }
199
main()200 fn main() {
201 // Iterate over all .txt files in the directory.
202 let paths = fs::read_dir(data_dir()).expect("Please update the Git submodule");
203 for direntry in paths {
204 let path = direntry.unwrap().path();
205 if path.extension().unwrap() == "txt" {
206 // Have a data file, parse and run the tests.
207 let filename = path.file_name().unwrap().to_str().unwrap();
208 println!("Running Test: {}", filename);
209 let file = fs::File::open(path).unwrap();
210 let reader = io::BufReader::new(file);
211 let mut count: usize = 0;
212 for line in reader.lines() {
213 let line = line.unwrap();
214 run_test(&line);
215 count += 1;
216 }
217 println!("Ran {} tests.", count);
218 }
219 }
220 }
221