nautilus_core/
parsing.rs

1// -------------------------------------------------------------------------------------------------
2//  Copyright (C) 2015-2025 Nautech Systems Pty Ltd. All rights reserved.
3//  https://nautechsystems.io
4//
5//  Licensed under the GNU Lesser General Public License Version 3.0 (the "License");
6//  You may not use this file except in compliance with the License.
7//  You may obtain a copy of the License at https://www.gnu.org/licenses/lgpl-3.0.en.html
8//
9//  Unless required by applicable law or agreed to in writing, software
10//  distributed under the License is distributed on an "AS IS" BASIS,
11//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//  See the License for the specific language governing permissions and
13//  limitations under the License.
14// -------------------------------------------------------------------------------------------------
15
16//! Core parsing functions.
17
18/// Clamps a length to `u8::MAX` with optional debug logging.
19#[inline]
20#[must_use]
21#[allow(clippy::cast_possible_truncation)]
22fn clamp_precision_with_log(len: usize, context: &str, input: &str) -> u8 {
23    if len > u8::MAX as usize {
24        log::debug!(
25            "{} precision clamped from {} to {} for input: {}",
26            context,
27            len,
28            u8::MAX,
29            input
30        );
31    }
32    len.min(u8::MAX as usize) as u8
33}
34
35/// Parses a scientific notation exponent and clamps to `u8::MAX`.
36///
37/// Returns `None` for invalid/empty exponents when `strict` is false,
38/// otherwise panics for malformed input.
39#[inline]
40#[must_use]
41fn parse_scientific_exponent(exponent_str: &str, strict: bool) -> Option<u8> {
42    match exponent_str.parse::<u64>() {
43        Ok(exp) => Some(exp.min(u8::MAX as u64) as u8),
44        Err(_) => {
45            if exponent_str.is_empty() && strict {
46                panic!("Invalid scientific notation format: missing exponent after 'e-'");
47            }
48            // If it's all digits but overflows u64, clamp to u8::MAX
49            if exponent_str.chars().all(|c| c.is_ascii_digit()) {
50                Some(u8::MAX)
51            } else if strict {
52                panic!(
53                    "Invalid scientific notation exponent '{}': must be a valid number",
54                    exponent_str
55                )
56            } else {
57                None // Return None for lenient parsing
58            }
59        }
60    }
61}
62
63/// Returns the decimal precision inferred from the given string.
64///
65/// For scientific notation with large negative exponents (e.g., "1e-300", "1e-4294967296"),
66/// the precision is clamped to `u8::MAX` (255) since that represents the maximum representable
67/// precision in this system. This handles arbitrarily large exponents without panicking.
68///
69/// # Panics
70///
71/// Panics if the input string is malformed (e.g., "1e-" with no exponent value, or non-numeric
72/// exponents like "1e-abc").
73#[must_use]
74#[allow(clippy::cast_possible_truncation)]
75pub fn precision_from_str(s: &str) -> u8 {
76    let s = s.trim().to_ascii_lowercase();
77
78    // Check for scientific notation
79    if s.contains("e-") {
80        let exponent_str = s
81            .split("e-")
82            .nth(1)
83            .expect("Invalid scientific notation format: missing exponent after 'e-'");
84
85        return parse_scientific_exponent(exponent_str, true)
86            .expect("parse_scientific_exponent should return Some in strict mode");
87    }
88
89    // Check for decimal precision
90    if let Some((_, decimal_part)) = s.split_once('.') {
91        clamp_precision_with_log(decimal_part.len(), "Decimal", &s)
92    } else {
93        0
94    }
95}
96
97/// Returns the minimum increment precision inferred from the given string,
98/// ignoring trailing zeros.
99///
100/// For scientific notation with large negative exponents (e.g., "1e-300"), the precision
101/// is clamped to `u8::MAX` (255) to match the behavior of `precision_from_str`.
102#[must_use]
103#[allow(clippy::cast_possible_truncation)]
104pub fn min_increment_precision_from_str(s: &str) -> u8 {
105    let s = s.trim().to_ascii_lowercase();
106
107    // Check for scientific notation
108    if let Some(pos) = s.find('e')
109        && s[pos + 1..].starts_with('-')
110    {
111        let exponent_str = &s[pos + 2..];
112        // Use lenient parsing (returns 0 for invalid, doesn't panic)
113        return parse_scientific_exponent(exponent_str, false).unwrap_or(0);
114    }
115
116    // Check for decimal precision
117    if let Some(dot_pos) = s.find('.') {
118        let decimal_part = &s[dot_pos + 1..];
119        if decimal_part.chars().any(|c| c != '0') {
120            let trimmed_len = decimal_part.trim_end_matches('0').len();
121            return clamp_precision_with_log(trimmed_len, "Minimum increment", &s);
122        }
123        clamp_precision_with_log(decimal_part.len(), "Decimal", &s)
124    } else {
125        0
126    }
127}
128
129/// Returns a `usize` from the given bytes.
130///
131/// # Errors
132///
133/// Returns an error if there are not enough bytes to represent a `usize`.
134pub fn bytes_to_usize(bytes: &[u8]) -> anyhow::Result<usize> {
135    // Check bytes width
136    if bytes.len() >= std::mem::size_of::<usize>() {
137        let mut buffer = [0u8; std::mem::size_of::<usize>()];
138        buffer.copy_from_slice(&bytes[..std::mem::size_of::<usize>()]);
139
140        Ok(usize::from_le_bytes(buffer))
141    } else {
142        anyhow::bail!("Not enough bytes to represent a `usize`");
143    }
144}
145
146////////////////////////////////////////////////////////////////////////////////
147// Tests
148////////////////////////////////////////////////////////////////////////////////
149#[cfg(test)]
150mod tests {
151    use rstest::rstest;
152
153    use super::*;
154
155    #[rstest]
156    #[case("", 0)]
157    #[case("0", 0)]
158    #[case("1.0", 1)]
159    #[case("1.00", 2)]
160    #[case("1.23456789", 8)]
161    #[case("123456.789101112", 9)]
162    #[case("0.000000001", 9)]
163    #[case("1e-1", 1)]
164    #[case("1e-2", 2)]
165    #[case("1e-3", 3)]
166    #[case("1e8", 0)]
167    #[case("-1.23", 2)]
168    #[case("-1e-2", 2)]
169    #[case("1E-2", 2)]
170    #[case("  1.23", 2)]
171    #[case("1.23  ", 2)]
172    fn test_precision_from_str(#[case] s: &str, #[case] expected: u8) {
173        let result = precision_from_str(s);
174        assert_eq!(result, expected);
175    }
176
177    #[rstest]
178    #[case("", 0)]
179    #[case("0", 0)]
180    #[case("1.0", 1)]
181    #[case("1.00", 2)]
182    #[case("1.23456789", 8)]
183    #[case("123456.789101112", 9)]
184    #[case("0.000000001", 9)]
185    #[case("1e-1", 1)]
186    #[case("1e-2", 2)]
187    #[case("1e-3", 3)]
188    #[case("1e8", 0)]
189    #[case("-1.23", 2)]
190    #[case("-1e-2", 2)]
191    #[case("1E-2", 2)]
192    #[case("  1.23", 2)]
193    #[case("1.23  ", 2)]
194    #[case("1.010", 2)]
195    #[case("1.00100", 3)]
196    #[case("0.0001000", 4)]
197    #[case("1.000000000", 9)]
198    fn test_min_increment_precision_from_str(#[case] s: &str, #[case] expected: u8) {
199        let result = min_increment_precision_from_str(s);
200        assert_eq!(result, expected);
201    }
202
203    #[rstest]
204    fn test_bytes_to_usize_empty() {
205        let payload: Vec<u8> = vec![];
206        let result = bytes_to_usize(&payload);
207        assert!(result.is_err());
208        assert_eq!(
209            result.err().unwrap().to_string(),
210            "Not enough bytes to represent a `usize`"
211        );
212    }
213
214    #[rstest]
215    fn test_bytes_to_usize_invalid() {
216        let payload: Vec<u8> = vec![0x01, 0x02, 0x03];
217        let result = bytes_to_usize(&payload);
218        assert!(result.is_err());
219        assert_eq!(
220            result.err().unwrap().to_string(),
221            "Not enough bytes to represent a `usize`"
222        );
223    }
224
225    #[rstest]
226    fn test_bytes_to_usize_valid() {
227        let payload: Vec<u8> = vec![0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08];
228        let result = bytes_to_usize(&payload).unwrap();
229        assert_eq!(result, 0x0807_0605_0403_0201);
230        assert_eq!(result, 578_437_695_752_307_201);
231    }
232
233    #[rstest]
234    fn test_precision_from_str_large_exponent_clamped() {
235        // u8::MAX is 255, so 999 should be clamped to 255
236        let result = precision_from_str("1e-999");
237        assert_eq!(result, 255);
238    }
239
240    #[rstest]
241    fn test_precision_from_str_very_large_exponent_clamped() {
242        // Very large exponents should also be clamped to u8::MAX
243        let result = precision_from_str("1e-300");
244        assert_eq!(result, 255);
245
246        let result = precision_from_str("1e-1000000");
247        assert_eq!(result, 255);
248    }
249
250    #[rstest]
251    #[should_panic(expected = "Invalid scientific notation exponent")]
252    fn test_precision_from_str_invalid_exponent_not_numeric() {
253        let _ = precision_from_str("1e-abc");
254    }
255
256    #[rstest]
257    #[should_panic(expected = "missing exponent after 'e-'")]
258    fn test_precision_from_str_malformed_scientific_notation() {
259        // "1e-" with empty exponent should panic (fail fast on malformed input)
260        let _ = precision_from_str("1e-");
261    }
262
263    #[rstest]
264    fn test_precision_from_str_edge_case_max_u8() {
265        // u8::MAX = 255, should work
266        let result = precision_from_str("1e-255");
267        assert_eq!(result, 255);
268    }
269
270    #[rstest]
271    fn test_precision_from_str_just_above_max_u8() {
272        // 256 should be clamped to 255
273        let result = precision_from_str("1e-256");
274        assert_eq!(result, 255);
275    }
276
277    #[rstest]
278    fn test_precision_from_str_u32_overflow() {
279        // Exponent > u32::MAX (4294967296) should be clamped to 255
280        let result = precision_from_str("1e-4294967296");
281        assert_eq!(result, 255);
282    }
283
284    #[rstest]
285    fn test_precision_from_str_u64_overflow() {
286        // Exponent > u64::MAX should be clamped to 255
287        let result = precision_from_str("1e-99999999999999999999");
288        assert_eq!(result, 255);
289    }
290
291    #[rstest]
292    fn test_min_increment_precision_from_str_large_exponent() {
293        // Large exponents should be clamped to u8::MAX (255), not return 0
294        let result = min_increment_precision_from_str("1e-300");
295        assert_eq!(result, 255);
296    }
297
298    #[rstest]
299    fn test_min_increment_precision_from_str_very_large_exponent() {
300        // Very large exponents should also be clamped to 255
301        let result = min_increment_precision_from_str("1e-99999999999999999999");
302        assert_eq!(result, 255);
303    }
304
305    #[rstest]
306    fn test_min_increment_precision_from_str_consistency() {
307        // Should match precision_from_str for large exponents
308        let input = "1e-1000";
309        let precision = precision_from_str(input);
310        let min_precision = min_increment_precision_from_str(input);
311        assert_eq!(precision, min_precision);
312        assert_eq!(precision, 255);
313    }
314}