• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! A simple example on how to use minimal_lexical within parser framework.
2 //!
3 //! This works on input bytes, however, it could be easily adapted to use
4 //! `io::Read`, or any iterator over bytes. Since floats can only include
5 //! ASCII characters, it will work with UTF-8 encoded data and return
6 //! remaining bytes properly on UTF-8 boundaries.
7 //!
8 //! # License
9 //!
10 //! This is example is unlicensed, so please adapt the code into your
11 //! own project. It is meant to show how to implement a float parser
12 //! easily and efficiently, and how to adapt it for specialized use-cases.
13 //!
14 //! ```text
15 //! This is free and unencumbered software released into the public domain.
16 //!
17 //! Anyone is free to copy, modify, publish, use, compile, sell, or
18 //! distribute this software, either in source code form or as a compiled
19 //! binary, for any purpose, commercial or non-commercial, and by any
20 //! means.
21 //!
22 //! In jurisdictions that recognize copyright laws, the author or authors
23 //! of this software dedicate any and all copyright interest in the
24 //! software to the public domain. We make this dedication for the benefit
25 //! of the public at large and to the detriment of our heirs and
26 //! successors. We intend this dedication to be an overt act of
27 //! relinquishment in perpetuity of all present and future rights to this
28 //! software under copyright law.
29 //!
30 //! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
31 //! EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
32 //! MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
33 //! IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
34 //! OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
35 //! ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
36 //! OTHER DEALINGS IN THE SOFTWARE.
37 //!
38 //! For more information, please refer to <http://unlicense.org/>
39 //! ```
40 
41 extern crate minimal_lexical;
42 
43 // HELPERS
44 // -------
45 
46 // These functions are simple, resuable componetns
47 
48 /// Find and parse sign and get remaining bytes.
49 #[inline]
parse_sign<'a>(bytes: &'a [u8]) -> (bool, &'a [u8])50 fn parse_sign<'a>(bytes: &'a [u8]) -> (bool, &'a [u8]) {
51     match bytes.get(0) {
52         Some(&b'+') => (true, &bytes[1..]),
53         Some(&b'-') => (false, &bytes[1..]),
54         _ => (true, bytes),
55     }
56 }
57 
58 // Convert u8 to digit.
59 #[inline]
to_digit(c: u8) -> Option<u32>60 fn to_digit(c: u8) -> Option<u32> {
61     (c as char).to_digit(10)
62 }
63 
64 // Add digit from exponent.
65 #[inline]
add_digit_i32(value: i32, digit: u32) -> Option<i32>66 fn add_digit_i32(value: i32, digit: u32) -> Option<i32> {
67     return value.checked_mul(10)?.checked_add(digit as i32);
68 }
69 
70 // Subtract digit from exponent.
71 #[inline]
sub_digit_i32(value: i32, digit: u32) -> Option<i32>72 fn sub_digit_i32(value: i32, digit: u32) -> Option<i32> {
73     return value.checked_mul(10)?.checked_sub(digit as i32);
74 }
75 
76 // Convert character to digit.
77 #[inline]
is_digit(c: u8) -> bool78 fn is_digit(c: u8) -> bool {
79     to_digit(c).is_some()
80 }
81 
82 // Split buffer at index.
83 #[inline]
split_at_index<'a>(digits: &'a [u8], index: usize) -> (&'a [u8], &'a [u8])84 fn split_at_index<'a>(digits: &'a [u8], index: usize) -> (&'a [u8], &'a [u8]) {
85     (&digits[..index], &digits[index..])
86 }
87 
88 /// Consume until a an invalid digit is found.
89 ///
90 /// - `digits`      - Slice containing 0 or more digits.
91 #[inline]
consume_digits<'a>(digits: &'a [u8]) -> (&'a [u8], &'a [u8])92 fn consume_digits<'a>(digits: &'a [u8]) -> (&'a [u8], &'a [u8]) {
93     // Consume all digits.
94     let mut index = 0;
95     while index < digits.len() && is_digit(digits[index]) {
96         index += 1;
97     }
98     split_at_index(digits, index)
99 }
100 
101 // Trim leading 0s.
102 #[inline]
ltrim_zero<'a>(bytes: &'a [u8]) -> &'a [u8]103 fn ltrim_zero<'a>(bytes: &'a [u8]) -> &'a [u8] {
104     let count = bytes.iter().take_while(|&&si| si == b'0').count();
105     &bytes[count..]
106 }
107 
108 // Trim trailing 0s.
109 #[inline]
rtrim_zero<'a>(bytes: &'a [u8]) -> &'a [u8]110 fn rtrim_zero<'a>(bytes: &'a [u8]) -> &'a [u8] {
111     let count = bytes.iter().rev().take_while(|&&si| si == b'0').count();
112     let index = bytes.len() - count;
113     &bytes[..index]
114 }
115 
116 // PARSERS
117 // -------
118 
119 /// Parse the exponent of the float.
120 ///
121 /// * `exponent`    - Slice containing the exponent digits.
122 /// * `is_positive` - If the exponent sign is positive.
parse_exponent(exponent: &[u8], is_positive: bool) -> i32123 fn parse_exponent(exponent: &[u8], is_positive: bool) -> i32 {
124     // Parse the sign bit or current data.
125     let mut value: i32 = 0;
126     match is_positive {
127         true => {
128             for c in exponent {
129                 value = match add_digit_i32(value, to_digit(*c).unwrap()) {
130                     Some(v) => v,
131                     None => return i32::max_value(),
132                 };
133             }
134         },
135         false => {
136             for c in exponent {
137                 value = match sub_digit_i32(value, to_digit(*c).unwrap()) {
138                     Some(v) => v,
139                     None => return i32::min_value(),
140                 };
141             }
142         },
143     }
144 
145     value
146 }
147 
148 /// Parse float from input bytes, returning the float and the remaining bytes.
149 ///
150 /// * `bytes`    - Array of bytes leading with float-data.
parse_float<'a, F>(bytes: &'a [u8]) -> (F, &'a [u8]) where F: minimal_lexical::Float,151 fn parse_float<'a, F>(bytes: &'a [u8]) -> (F, &'a [u8])
152 where
153     F: minimal_lexical::Float,
154 {
155     // Parse the sign.
156     let (is_positive, bytes) = parse_sign(bytes);
157 
158     // Note: this does not handle special float values.
159     // You will have to handle NaN, Inf, and Infinity
160     // on your own.
161 
162     // Extract and parse the float components:
163     //  1. Integer
164     //  2. Fraction
165     //  3. Exponent
166     let (integer_slc, bytes) = consume_digits(bytes);
167     let (fraction_slc, bytes) = match bytes.first() {
168         Some(&b'.') => consume_digits(&bytes[1..]),
169         _ => (&bytes[..0], bytes),
170     };
171     let (exponent, bytes) = match bytes.first() {
172         Some(&b'e') | Some(&b'E') => {
173             // Extract and parse the exponent.
174             let (is_positive, bytes) = parse_sign(&bytes[1..]);
175             let (exponent, bytes) = consume_digits(bytes);
176             (parse_exponent(exponent, is_positive), bytes)
177         },
178         _ => (0, bytes),
179     };
180 
181     // Note: You may want to check and validate the float data here:
182     //  1). Many floats require integer or fraction digits, if a fraction
183     //      is present.
184     //  2). All floats require either integer or fraction digits.
185     //  3). Some floats do not allow a '+' sign before the significant digits.
186     //  4). Many floats require exponent digits after the exponent symbol.
187     //  5). Some floats do not allow a '+' sign before the exponent.
188 
189     // We now need to trim leading and trailing 0s from the integer
190     // and fraction, respectively. This is required to make the
191     // fast and moderate paths more efficient, and for the slow
192     // path.
193     let integer_slc = ltrim_zero(integer_slc);
194     let fraction_slc = rtrim_zero(fraction_slc);
195 
196     // Create the float and return our data.
197     let mut float: F =
198         minimal_lexical::parse_float(integer_slc.iter(), fraction_slc.iter(), exponent);
199     if !is_positive {
200         float = -float;
201     }
202 
203     (float, bytes)
204 }
205 
main()206 pub fn main() {
207     let check_parse_float =
208         |s: &str, v, t: &str| assert_eq!(parse_float(s.as_bytes()), (v, t.as_bytes()));
209 
210     check_parse_float("1.0e7", 1.0e7f64, "");
211     check_parse_float("12345.67", 12345.67, "");
212     check_parse_float("12345.67 narnia", 12345.67, " narnia");
213 }
214