1 //! A simple example on how to use minimal_lexical within parser framework.
2 //!
3 //! This works on input bytes, however, it could be easily adapted to use
4 //! `io::Read`, or any iterator over bytes. Since floats can only include
5 //! ASCII characters, it will work with UTF-8 encoded data and return
6 //! remaining bytes properly on UTF-8 boundaries.
7 //!
8 //! # License
9 //!
10 //! This is example is unlicensed, so please adapt the code into your
11 //! own project. It is meant to show how to implement a float parser
12 //! easily and efficiently, and how to adapt it for specialized use-cases.
13 //!
14 //! ```text
15 //! This is free and unencumbered software released into the public domain.
16 //!
17 //! Anyone is free to copy, modify, publish, use, compile, sell, or
18 //! distribute this software, either in source code form or as a compiled
19 //! binary, for any purpose, commercial or non-commercial, and by any
20 //! means.
21 //!
22 //! In jurisdictions that recognize copyright laws, the author or authors
23 //! of this software dedicate any and all copyright interest in the
24 //! software to the public domain. We make this dedication for the benefit
25 //! of the public at large and to the detriment of our heirs and
26 //! successors. We intend this dedication to be an overt act of
27 //! relinquishment in perpetuity of all present and future rights to this
28 //! software under copyright law.
29 //!
30 //! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
31 //! EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
32 //! MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
33 //! IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
34 //! OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
35 //! ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
36 //! OTHER DEALINGS IN THE SOFTWARE.
37 //!
38 //! For more information, please refer to <http://unlicense.org/>
39 //! ```
40
41 extern crate minimal_lexical;
42
43 // HELPERS
44 // -------
45
46 // These functions are simple, resuable componetns
47
48 /// Find and parse sign and get remaining bytes.
49 #[inline]
parse_sign<'a>(bytes: &'a [u8]) -> (bool, &'a [u8])50 fn parse_sign<'a>(bytes: &'a [u8]) -> (bool, &'a [u8]) {
51 match bytes.get(0) {
52 Some(&b'+') => (true, &bytes[1..]),
53 Some(&b'-') => (false, &bytes[1..]),
54 _ => (true, bytes),
55 }
56 }
57
58 // Convert u8 to digit.
59 #[inline]
to_digit(c: u8) -> Option<u32>60 fn to_digit(c: u8) -> Option<u32> {
61 (c as char).to_digit(10)
62 }
63
64 // Add digit from exponent.
65 #[inline]
add_digit_i32(value: i32, digit: u32) -> Option<i32>66 fn add_digit_i32(value: i32, digit: u32) -> Option<i32> {
67 return value.checked_mul(10)?.checked_add(digit as i32);
68 }
69
70 // Subtract digit from exponent.
71 #[inline]
sub_digit_i32(value: i32, digit: u32) -> Option<i32>72 fn sub_digit_i32(value: i32, digit: u32) -> Option<i32> {
73 return value.checked_mul(10)?.checked_sub(digit as i32);
74 }
75
76 // Convert character to digit.
77 #[inline]
is_digit(c: u8) -> bool78 fn is_digit(c: u8) -> bool {
79 to_digit(c).is_some()
80 }
81
82 // Split buffer at index.
83 #[inline]
split_at_index<'a>(digits: &'a [u8], index: usize) -> (&'a [u8], &'a [u8])84 fn split_at_index<'a>(digits: &'a [u8], index: usize) -> (&'a [u8], &'a [u8]) {
85 (&digits[..index], &digits[index..])
86 }
87
88 /// Consume until a an invalid digit is found.
89 ///
90 /// - `digits` - Slice containing 0 or more digits.
91 #[inline]
consume_digits<'a>(digits: &'a [u8]) -> (&'a [u8], &'a [u8])92 fn consume_digits<'a>(digits: &'a [u8]) -> (&'a [u8], &'a [u8]) {
93 // Consume all digits.
94 let mut index = 0;
95 while index < digits.len() && is_digit(digits[index]) {
96 index += 1;
97 }
98 split_at_index(digits, index)
99 }
100
101 // Trim leading 0s.
102 #[inline]
ltrim_zero<'a>(bytes: &'a [u8]) -> &'a [u8]103 fn ltrim_zero<'a>(bytes: &'a [u8]) -> &'a [u8] {
104 let count = bytes.iter().take_while(|&&si| si == b'0').count();
105 &bytes[count..]
106 }
107
108 // Trim trailing 0s.
109 #[inline]
rtrim_zero<'a>(bytes: &'a [u8]) -> &'a [u8]110 fn rtrim_zero<'a>(bytes: &'a [u8]) -> &'a [u8] {
111 let count = bytes.iter().rev().take_while(|&&si| si == b'0').count();
112 let index = bytes.len() - count;
113 &bytes[..index]
114 }
115
116 // PARSERS
117 // -------
118
119 /// Parse the exponent of the float.
120 ///
121 /// * `exponent` - Slice containing the exponent digits.
122 /// * `is_positive` - If the exponent sign is positive.
parse_exponent(exponent: &[u8], is_positive: bool) -> i32123 fn parse_exponent(exponent: &[u8], is_positive: bool) -> i32 {
124 // Parse the sign bit or current data.
125 let mut value: i32 = 0;
126 match is_positive {
127 true => {
128 for c in exponent {
129 value = match add_digit_i32(value, to_digit(*c).unwrap()) {
130 Some(v) => v,
131 None => return i32::max_value(),
132 };
133 }
134 },
135 false => {
136 for c in exponent {
137 value = match sub_digit_i32(value, to_digit(*c).unwrap()) {
138 Some(v) => v,
139 None => return i32::min_value(),
140 };
141 }
142 },
143 }
144
145 value
146 }
147
148 /// Parse float from input bytes, returning the float and the remaining bytes.
149 ///
150 /// * `bytes` - Array of bytes leading with float-data.
parse_float<'a, F>(bytes: &'a [u8]) -> (F, &'a [u8]) where F: minimal_lexical::Float,151 fn parse_float<'a, F>(bytes: &'a [u8]) -> (F, &'a [u8])
152 where
153 F: minimal_lexical::Float,
154 {
155 // Parse the sign.
156 let (is_positive, bytes) = parse_sign(bytes);
157
158 // Note: this does not handle special float values.
159 // You will have to handle NaN, Inf, and Infinity
160 // on your own.
161
162 // Extract and parse the float components:
163 // 1. Integer
164 // 2. Fraction
165 // 3. Exponent
166 let (integer_slc, bytes) = consume_digits(bytes);
167 let (fraction_slc, bytes) = match bytes.first() {
168 Some(&b'.') => consume_digits(&bytes[1..]),
169 _ => (&bytes[..0], bytes),
170 };
171 let (exponent, bytes) = match bytes.first() {
172 Some(&b'e') | Some(&b'E') => {
173 // Extract and parse the exponent.
174 let (is_positive, bytes) = parse_sign(&bytes[1..]);
175 let (exponent, bytes) = consume_digits(bytes);
176 (parse_exponent(exponent, is_positive), bytes)
177 },
178 _ => (0, bytes),
179 };
180
181 // Note: You may want to check and validate the float data here:
182 // 1). Many floats require integer or fraction digits, if a fraction
183 // is present.
184 // 2). All floats require either integer or fraction digits.
185 // 3). Some floats do not allow a '+' sign before the significant digits.
186 // 4). Many floats require exponent digits after the exponent symbol.
187 // 5). Some floats do not allow a '+' sign before the exponent.
188
189 // We now need to trim leading and trailing 0s from the integer
190 // and fraction, respectively. This is required to make the
191 // fast and moderate paths more efficient, and for the slow
192 // path.
193 let integer_slc = ltrim_zero(integer_slc);
194 let fraction_slc = rtrim_zero(fraction_slc);
195
196 // Create the float and return our data.
197 let mut float: F =
198 minimal_lexical::parse_float(integer_slc.iter(), fraction_slc.iter(), exponent);
199 if !is_positive {
200 float = -float;
201 }
202
203 (float, bytes)
204 }
205
main()206 pub fn main() {
207 let check_parse_float =
208 |s: &str, v, t: &str| assert_eq!(parse_float(s.as_bytes()), (v, t.as_bytes()));
209
210 check_parse_float("1.0e7", 1.0e7f64, "");
211 check_parse_float("12345.67", 12345.67, "");
212 check_parse_float("12345.67 narnia", 12345.67, " narnia");
213 }
214