nautilus_core/
parsing.rs

1// -------------------------------------------------------------------------------------------------
2//  Copyright (C) 2015-2026 Nautech Systems Pty Ltd. All rights reserved.
3//  https://nautechsystems.io
4//
5//  Licensed under the GNU Lesser General Public License Version 3.0 (the "License");
6//  You may not use this file except in compliance with the License.
7//  You may obtain a copy of the License at https://www.gnu.org/licenses/lgpl-3.0.en.html
8//
9//  Unless required by applicable law or agreed to in writing, software
10//  distributed under the License is distributed on an "AS IS" BASIS,
11//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//  See the License for the specific language governing permissions and
13//  limitations under the License.
14// -------------------------------------------------------------------------------------------------
15
16//! Core parsing functions.
17
18/// Clamps a length to `u8::MAX` with optional debug logging.
19#[inline]
20#[must_use]
21#[allow(
22    clippy::cast_possible_truncation,
23    reason = "Intentional for parsing, value range validated"
24)]
25fn clamp_precision_with_log(len: usize, context: &str, input: &str) -> u8 {
26    if len > u8::MAX as usize {
27        log::debug!(
28            "{} precision clamped from {} to {} for input: {}",
29            context,
30            len,
31            u8::MAX,
32            input
33        );
34    }
35    len.min(u8::MAX as usize) as u8
36}
37
38/// Parses a scientific notation exponent and clamps to `u8::MAX`.
39///
40/// Returns `None` for invalid/empty exponents when `strict` is false,
41/// otherwise panics for malformed input.
42#[inline]
43#[must_use]
44fn parse_scientific_exponent(exponent_str: &str, strict: bool) -> Option<u8> {
45    if let Ok(exp) = exponent_str.parse::<u64>() {
46        Some(exp.min(u64::from(u8::MAX)) as u8)
47    } else {
48        assert!(
49            !(exponent_str.is_empty() && strict),
50            "Invalid scientific notation format: missing exponent after 'e-'"
51        );
52
53        // Empty string is invalid (not a large number that overflowed)
54        if exponent_str.is_empty() {
55            return None;
56        }
57
58        // If it's all digits but overflows u64, clamp to u8::MAX
59        if exponent_str.chars().all(|c| c.is_ascii_digit()) {
60            Some(u8::MAX)
61        } else if strict {
62            panic!("Invalid scientific notation exponent '{exponent_str}': must be a valid number")
63        } else {
64            None
65        }
66    }
67}
68
69/// Returns the decimal precision inferred from the given string.
70///
71/// For scientific notation with large negative exponents (e.g., "1e-300", "1e-4294967296"),
72/// the precision is clamped to `u8::MAX` (255) since that represents the maximum representable
73/// precision in this system. This handles arbitrarily large exponents without panicking.
74///
75/// # Panics
76///
77/// Panics if the input string is malformed (e.g., "1e-" with no exponent value, or non-numeric
78/// exponents like "1e-abc").
79#[must_use]
80#[allow(
81    clippy::cast_possible_truncation,
82    reason = "Intentional for parsing, value range validated"
83)]
84pub fn precision_from_str(s: &str) -> u8 {
85    let s = s.trim().to_ascii_lowercase();
86
87    // Check for scientific notation
88    if s.contains("e-") {
89        let exponent_str = s
90            .split("e-")
91            .nth(1)
92            .expect("Invalid scientific notation format: missing exponent after 'e-'");
93
94        return parse_scientific_exponent(exponent_str, true)
95            .expect("parse_scientific_exponent should return Some in strict mode");
96    }
97
98    // Check for decimal precision
99    if let Some((_, decimal_part)) = s.split_once('.') {
100        clamp_precision_with_log(decimal_part.len(), "Decimal", &s)
101    } else {
102        0
103    }
104}
105
106/// Returns the minimum increment precision inferred from the given string,
107/// ignoring trailing zeros.
108///
109/// For scientific notation with large negative exponents (e.g., "1e-300"), the precision
110/// is clamped to `u8::MAX` (255) to match the behavior of `precision_from_str`.
111#[must_use]
112#[allow(
113    clippy::cast_possible_truncation,
114    reason = "Intentional for parsing, value range validated"
115)]
116pub fn min_increment_precision_from_str(s: &str) -> u8 {
117    let s = s.trim().to_ascii_lowercase();
118
119    // Check for scientific notation
120    if let Some(pos) = s.find('e')
121        && s[pos + 1..].starts_with('-')
122    {
123        let exponent_str = &s[pos + 2..];
124        // Use lenient parsing (returns 0 for invalid, doesn't panic)
125        return parse_scientific_exponent(exponent_str, false).unwrap_or(0);
126    }
127
128    // Check for decimal precision
129    if let Some(dot_pos) = s.find('.') {
130        let decimal_part = &s[dot_pos + 1..];
131        if decimal_part.chars().any(|c| c != '0') {
132            let trimmed_len = decimal_part.trim_end_matches('0').len();
133            return clamp_precision_with_log(trimmed_len, "Minimum increment", &s);
134        }
135        clamp_precision_with_log(decimal_part.len(), "Decimal", &s)
136    } else {
137        0
138    }
139}
140
141/// Returns a `usize` from the given bytes.
142///
143/// # Errors
144///
145/// Returns an error if there are not enough bytes to represent a `usize`.
146pub fn bytes_to_usize(bytes: &[u8]) -> anyhow::Result<usize> {
147    // Check bytes width
148    if bytes.len() >= std::mem::size_of::<usize>() {
149        let mut buffer = [0u8; std::mem::size_of::<usize>()];
150        buffer.copy_from_slice(&bytes[..std::mem::size_of::<usize>()]);
151
152        Ok(usize::from_le_bytes(buffer))
153    } else {
154        anyhow::bail!("Not enough bytes to represent a `usize`");
155    }
156}
157
158#[cfg(test)]
159mod tests {
160    use rstest::rstest;
161
162    use super::*;
163
164    #[rstest]
165    #[case("", 0)]
166    #[case("0", 0)]
167    #[case("1.0", 1)]
168    #[case("1.00", 2)]
169    #[case("1.23456789", 8)]
170    #[case("123456.789101112", 9)]
171    #[case("0.000000001", 9)]
172    #[case("1e-1", 1)]
173    #[case("1e-2", 2)]
174    #[case("1e-3", 3)]
175    #[case("1e8", 0)]
176    #[case("-1.23", 2)]
177    #[case("-1e-2", 2)]
178    #[case("1E-2", 2)]
179    #[case("  1.23", 2)]
180    #[case("1.23  ", 2)]
181    fn test_precision_from_str(#[case] s: &str, #[case] expected: u8) {
182        let result = precision_from_str(s);
183        assert_eq!(result, expected);
184    }
185
186    #[rstest]
187    #[case("", 0)]
188    #[case("0", 0)]
189    #[case("1.0", 1)]
190    #[case("1.00", 2)]
191    #[case("1.23456789", 8)]
192    #[case("123456.789101112", 9)]
193    #[case("0.000000001", 9)]
194    #[case("1e-1", 1)]
195    #[case("1e-2", 2)]
196    #[case("1e-3", 3)]
197    #[case("1e8", 0)]
198    #[case("-1.23", 2)]
199    #[case("-1e-2", 2)]
200    #[case("1E-2", 2)]
201    #[case("  1.23", 2)]
202    #[case("1.23  ", 2)]
203    #[case("1.010", 2)]
204    #[case("1.00100", 3)]
205    #[case("0.0001000", 4)]
206    #[case("1.000000000", 9)]
207    fn test_min_increment_precision_from_str(#[case] s: &str, #[case] expected: u8) {
208        let result = min_increment_precision_from_str(s);
209        assert_eq!(result, expected);
210    }
211
212    #[rstest]
213    fn test_bytes_to_usize_empty() {
214        let payload: Vec<u8> = vec![];
215        let result = bytes_to_usize(&payload);
216        assert!(result.is_err());
217        assert_eq!(
218            result.err().unwrap().to_string(),
219            "Not enough bytes to represent a `usize`"
220        );
221    }
222
223    #[rstest]
224    fn test_bytes_to_usize_invalid() {
225        let payload: Vec<u8> = vec![0x01, 0x02, 0x03];
226        let result = bytes_to_usize(&payload);
227        assert!(result.is_err());
228        assert_eq!(
229            result.err().unwrap().to_string(),
230            "Not enough bytes to represent a `usize`"
231        );
232    }
233
234    #[rstest]
235    fn test_bytes_to_usize_valid() {
236        let payload: Vec<u8> = vec![0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08];
237        let result = bytes_to_usize(&payload).unwrap();
238        assert_eq!(result, 0x0807_0605_0403_0201);
239        assert_eq!(result, 578_437_695_752_307_201);
240    }
241
242    #[rstest]
243    fn test_precision_from_str_large_exponent_clamped() {
244        // u8::MAX is 255, so 999 should be clamped to 255
245        let result = precision_from_str("1e-999");
246        assert_eq!(result, 255);
247    }
248
249    #[rstest]
250    fn test_precision_from_str_very_large_exponent_clamped() {
251        // Very large exponents should also be clamped to u8::MAX
252        let result = precision_from_str("1e-300");
253        assert_eq!(result, 255);
254
255        let result = precision_from_str("1e-1000000");
256        assert_eq!(result, 255);
257    }
258
259    #[rstest]
260    #[should_panic(expected = "Invalid scientific notation exponent")]
261    fn test_precision_from_str_invalid_exponent_not_numeric() {
262        let _ = precision_from_str("1e-abc");
263    }
264
265    #[rstest]
266    #[should_panic(expected = "missing exponent after 'e-'")]
267    fn test_precision_from_str_malformed_scientific_notation() {
268        // "1e-" with empty exponent should panic (fail fast on malformed input)
269        let _ = precision_from_str("1e-");
270    }
271
272    #[rstest]
273    fn test_precision_from_str_edge_case_max_u8() {
274        // u8::MAX = 255, should work
275        let result = precision_from_str("1e-255");
276        assert_eq!(result, 255);
277    }
278
279    #[rstest]
280    fn test_precision_from_str_just_above_max_u8() {
281        // 256 should be clamped to 255
282        let result = precision_from_str("1e-256");
283        assert_eq!(result, 255);
284    }
285
286    #[rstest]
287    fn test_precision_from_str_u32_overflow() {
288        // Exponent > u32::MAX (4294967296) should be clamped to 255
289        let result = precision_from_str("1e-4294967296");
290        assert_eq!(result, 255);
291    }
292
293    #[rstest]
294    fn test_precision_from_str_u64_overflow() {
295        // Exponent > u64::MAX should be clamped to 255
296        let result = precision_from_str("1e-99999999999999999999");
297        assert_eq!(result, 255);
298    }
299
300    #[rstest]
301    fn test_min_increment_precision_from_str_large_exponent() {
302        // Large exponents should be clamped to u8::MAX (255), not return 0
303        let result = min_increment_precision_from_str("1e-300");
304        assert_eq!(result, 255);
305    }
306
307    #[rstest]
308    fn test_min_increment_precision_from_str_very_large_exponent() {
309        // Very large exponents should also be clamped to 255
310        let result = min_increment_precision_from_str("1e-99999999999999999999");
311        assert_eq!(result, 255);
312    }
313
314    #[rstest]
315    fn test_min_increment_precision_from_str_consistency() {
316        // Should match precision_from_str for large exponents
317        let input = "1e-1000";
318        let precision = precision_from_str(input);
319        let min_precision = min_increment_precision_from_str(input);
320        assert_eq!(precision, min_precision);
321        assert_eq!(precision, 255);
322    }
323
324    #[rstest]
325    fn test_min_increment_precision_from_str_empty_exponent() {
326        // Empty exponent should return 0, not u8::MAX
327        let result = min_increment_precision_from_str("1e-");
328        assert_eq!(result, 0);
329    }
330}
nautilus_core/parsing.rs

nautilus_core/
parsing.rs