nautilus_core/
parsing.rs

1// -------------------------------------------------------------------------------------------------
2//  Copyright (C) 2015-2025 Nautech Systems Pty Ltd. All rights reserved.
3//  https://nautechsystems.io
4//
5//  Licensed under the GNU Lesser General Public License Version 3.0 (the "License");
6//  You may not use this file except in compliance with the License.
7//  You may obtain a copy of the License at https://www.gnu.org/licenses/lgpl-3.0.en.html
8//
9//  Unless required by applicable law or agreed to in writing, software
10//  distributed under the License is distributed on an "AS IS" BASIS,
11//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//  See the License for the specific language governing permissions and
13//  limitations under the License.
14// -------------------------------------------------------------------------------------------------
15
16//! Core parsing functions.
17
18/// Clamps a length to `u8::MAX` with optional debug logging.
19#[inline]
20#[must_use]
21#[allow(
22    clippy::cast_possible_truncation,
23    reason = "Intentional for parsing, value range validated"
24)]
25fn clamp_precision_with_log(len: usize, context: &str, input: &str) -> u8 {
26    if len > u8::MAX as usize {
27        log::debug!(
28            "{} precision clamped from {} to {} for input: {}",
29            context,
30            len,
31            u8::MAX,
32            input
33        );
34    }
35    len.min(u8::MAX as usize) as u8
36}
37
38/// Parses a scientific notation exponent and clamps to `u8::MAX`.
39///
40/// Returns `None` for invalid/empty exponents when `strict` is false,
41/// otherwise panics for malformed input.
42#[inline]
43#[must_use]
44fn parse_scientific_exponent(exponent_str: &str, strict: bool) -> Option<u8> {
45    if let Ok(exp) = exponent_str.parse::<u64>() {
46        Some(exp.min(u64::from(u8::MAX)) as u8)
47    } else {
48        assert!(
49            !(exponent_str.is_empty() && strict),
50            "Invalid scientific notation format: missing exponent after 'e-'"
51        );
52        // If it's all digits but overflows u64, clamp to u8::MAX
53        if exponent_str.chars().all(|c| c.is_ascii_digit()) {
54            Some(u8::MAX)
55        } else if strict {
56            panic!("Invalid scientific notation exponent '{exponent_str}': must be a valid number")
57        } else {
58            None // Return None for lenient parsing
59        }
60    }
61}
62
63/// Returns the decimal precision inferred from the given string.
64///
65/// For scientific notation with large negative exponents (e.g., "1e-300", "1e-4294967296"),
66/// the precision is clamped to `u8::MAX` (255) since that represents the maximum representable
67/// precision in this system. This handles arbitrarily large exponents without panicking.
68///
69/// # Panics
70///
71/// Panics if the input string is malformed (e.g., "1e-" with no exponent value, or non-numeric
72/// exponents like "1e-abc").
73#[must_use]
74#[allow(
75    clippy::cast_possible_truncation,
76    reason = "Intentional for parsing, value range validated"
77)]
78pub fn precision_from_str(s: &str) -> u8 {
79    let s = s.trim().to_ascii_lowercase();
80
81    // Check for scientific notation
82    if s.contains("e-") {
83        let exponent_str = s
84            .split("e-")
85            .nth(1)
86            .expect("Invalid scientific notation format: missing exponent after 'e-'");
87
88        return parse_scientific_exponent(exponent_str, true)
89            .expect("parse_scientific_exponent should return Some in strict mode");
90    }
91
92    // Check for decimal precision
93    if let Some((_, decimal_part)) = s.split_once('.') {
94        clamp_precision_with_log(decimal_part.len(), "Decimal", &s)
95    } else {
96        0
97    }
98}
99
100/// Returns the minimum increment precision inferred from the given string,
101/// ignoring trailing zeros.
102///
103/// For scientific notation with large negative exponents (e.g., "1e-300"), the precision
104/// is clamped to `u8::MAX` (255) to match the behavior of `precision_from_str`.
105#[must_use]
106#[allow(
107    clippy::cast_possible_truncation,
108    reason = "Intentional for parsing, value range validated"
109)]
110pub fn min_increment_precision_from_str(s: &str) -> u8 {
111    let s = s.trim().to_ascii_lowercase();
112
113    // Check for scientific notation
114    if let Some(pos) = s.find('e')
115        && s[pos + 1..].starts_with('-')
116    {
117        let exponent_str = &s[pos + 2..];
118        // Use lenient parsing (returns 0 for invalid, doesn't panic)
119        return parse_scientific_exponent(exponent_str, false).unwrap_or(0);
120    }
121
122    // Check for decimal precision
123    if let Some(dot_pos) = s.find('.') {
124        let decimal_part = &s[dot_pos + 1..];
125        if decimal_part.chars().any(|c| c != '0') {
126            let trimmed_len = decimal_part.trim_end_matches('0').len();
127            return clamp_precision_with_log(trimmed_len, "Minimum increment", &s);
128        }
129        clamp_precision_with_log(decimal_part.len(), "Decimal", &s)
130    } else {
131        0
132    }
133}
134
135/// Returns a `usize` from the given bytes.
136///
137/// # Errors
138///
139/// Returns an error if there are not enough bytes to represent a `usize`.
140pub fn bytes_to_usize(bytes: &[u8]) -> anyhow::Result<usize> {
141    // Check bytes width
142    if bytes.len() >= std::mem::size_of::<usize>() {
143        let mut buffer = [0u8; std::mem::size_of::<usize>()];
144        buffer.copy_from_slice(&bytes[..std::mem::size_of::<usize>()]);
145
146        Ok(usize::from_le_bytes(buffer))
147    } else {
148        anyhow::bail!("Not enough bytes to represent a `usize`");
149    }
150}
151
152#[cfg(test)]
153mod tests {
154    use rstest::rstest;
155
156    use super::*;
157
158    #[rstest]
159    #[case("", 0)]
160    #[case("0", 0)]
161    #[case("1.0", 1)]
162    #[case("1.00", 2)]
163    #[case("1.23456789", 8)]
164    #[case("123456.789101112", 9)]
165    #[case("0.000000001", 9)]
166    #[case("1e-1", 1)]
167    #[case("1e-2", 2)]
168    #[case("1e-3", 3)]
169    #[case("1e8", 0)]
170    #[case("-1.23", 2)]
171    #[case("-1e-2", 2)]
172    #[case("1E-2", 2)]
173    #[case("  1.23", 2)]
174    #[case("1.23  ", 2)]
175    fn test_precision_from_str(#[case] s: &str, #[case] expected: u8) {
176        let result = precision_from_str(s);
177        assert_eq!(result, expected);
178    }
179
180    #[rstest]
181    #[case("", 0)]
182    #[case("0", 0)]
183    #[case("1.0", 1)]
184    #[case("1.00", 2)]
185    #[case("1.23456789", 8)]
186    #[case("123456.789101112", 9)]
187    #[case("0.000000001", 9)]
188    #[case("1e-1", 1)]
189    #[case("1e-2", 2)]
190    #[case("1e-3", 3)]
191    #[case("1e8", 0)]
192    #[case("-1.23", 2)]
193    #[case("-1e-2", 2)]
194    #[case("1E-2", 2)]
195    #[case("  1.23", 2)]
196    #[case("1.23  ", 2)]
197    #[case("1.010", 2)]
198    #[case("1.00100", 3)]
199    #[case("0.0001000", 4)]
200    #[case("1.000000000", 9)]
201    fn test_min_increment_precision_from_str(#[case] s: &str, #[case] expected: u8) {
202        let result = min_increment_precision_from_str(s);
203        assert_eq!(result, expected);
204    }
205
206    #[rstest]
207    fn test_bytes_to_usize_empty() {
208        let payload: Vec<u8> = vec![];
209        let result = bytes_to_usize(&payload);
210        assert!(result.is_err());
211        assert_eq!(
212            result.err().unwrap().to_string(),
213            "Not enough bytes to represent a `usize`"
214        );
215    }
216
217    #[rstest]
218    fn test_bytes_to_usize_invalid() {
219        let payload: Vec<u8> = vec![0x01, 0x02, 0x03];
220        let result = bytes_to_usize(&payload);
221        assert!(result.is_err());
222        assert_eq!(
223            result.err().unwrap().to_string(),
224            "Not enough bytes to represent a `usize`"
225        );
226    }
227
228    #[rstest]
229    fn test_bytes_to_usize_valid() {
230        let payload: Vec<u8> = vec![0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08];
231        let result = bytes_to_usize(&payload).unwrap();
232        assert_eq!(result, 0x0807_0605_0403_0201);
233        assert_eq!(result, 578_437_695_752_307_201);
234    }
235
236    #[rstest]
237    fn test_precision_from_str_large_exponent_clamped() {
238        // u8::MAX is 255, so 999 should be clamped to 255
239        let result = precision_from_str("1e-999");
240        assert_eq!(result, 255);
241    }
242
243    #[rstest]
244    fn test_precision_from_str_very_large_exponent_clamped() {
245        // Very large exponents should also be clamped to u8::MAX
246        let result = precision_from_str("1e-300");
247        assert_eq!(result, 255);
248
249        let result = precision_from_str("1e-1000000");
250        assert_eq!(result, 255);
251    }
252
253    #[rstest]
254    #[should_panic(expected = "Invalid scientific notation exponent")]
255    fn test_precision_from_str_invalid_exponent_not_numeric() {
256        let _ = precision_from_str("1e-abc");
257    }
258
259    #[rstest]
260    #[should_panic(expected = "missing exponent after 'e-'")]
261    fn test_precision_from_str_malformed_scientific_notation() {
262        // "1e-" with empty exponent should panic (fail fast on malformed input)
263        let _ = precision_from_str("1e-");
264    }
265
266    #[rstest]
267    fn test_precision_from_str_edge_case_max_u8() {
268        // u8::MAX = 255, should work
269        let result = precision_from_str("1e-255");
270        assert_eq!(result, 255);
271    }
272
273    #[rstest]
274    fn test_precision_from_str_just_above_max_u8() {
275        // 256 should be clamped to 255
276        let result = precision_from_str("1e-256");
277        assert_eq!(result, 255);
278    }
279
280    #[rstest]
281    fn test_precision_from_str_u32_overflow() {
282        // Exponent > u32::MAX (4294967296) should be clamped to 255
283        let result = precision_from_str("1e-4294967296");
284        assert_eq!(result, 255);
285    }
286
287    #[rstest]
288    fn test_precision_from_str_u64_overflow() {
289        // Exponent > u64::MAX should be clamped to 255
290        let result = precision_from_str("1e-99999999999999999999");
291        assert_eq!(result, 255);
292    }
293
294    #[rstest]
295    fn test_min_increment_precision_from_str_large_exponent() {
296        // Large exponents should be clamped to u8::MAX (255), not return 0
297        let result = min_increment_precision_from_str("1e-300");
298        assert_eq!(result, 255);
299    }
300
301    #[rstest]
302    fn test_min_increment_precision_from_str_very_large_exponent() {
303        // Very large exponents should also be clamped to 255
304        let result = min_increment_precision_from_str("1e-99999999999999999999");
305        assert_eq!(result, 255);
306    }
307
308    #[rstest]
309    fn test_min_increment_precision_from_str_consistency() {
310        // Should match precision_from_str for large exponents
311        let input = "1e-1000";
312        let precision = precision_from_str(input);
313        let min_precision = min_increment_precision_from_str(input);
314        assert_eq!(precision, min_precision);
315        assert_eq!(precision, 255);
316    }
317}