nautilus_core/
parsing.rs

1// -------------------------------------------------------------------------------------------------
2//  Copyright (C) 2015-2025 Nautech Systems Pty Ltd. All rights reserved.
3//  https://nautechsystems.io
4//
5//  Licensed under the GNU Lesser General Public License Version 3.0 (the "License");
6//  You may not use this file except in compliance with the License.
7//  You may obtain a copy of the License at https://www.gnu.org/licenses/lgpl-3.0.en.html
8//
9//  Unless required by applicable law or agreed to in writing, software
10//  distributed under the License is distributed on an "AS IS" BASIS,
11//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//  See the License for the specific language governing permissions and
13//  limitations under the License.
14// -------------------------------------------------------------------------------------------------
15
16//! Core parsing functions.
17
18/// Clamps a length to `u8::MAX` with optional debug logging.
19#[inline]
20#[must_use]
21#[allow(clippy::cast_possible_truncation)]
22fn clamp_precision_with_log(len: usize, context: &str, input: &str) -> u8 {
23    if len > u8::MAX as usize {
24        log::debug!(
25            "{} precision clamped from {} to {} for input: {}",
26            context,
27            len,
28            u8::MAX,
29            input
30        );
31    }
32    len.min(u8::MAX as usize) as u8
33}
34
35/// Parses a scientific notation exponent and clamps to `u8::MAX`.
36///
37/// Returns `None` for invalid/empty exponents when `strict` is false,
38/// otherwise panics for malformed input.
39#[inline]
40#[must_use]
41fn parse_scientific_exponent(exponent_str: &str, strict: bool) -> Option<u8> {
42    if let Ok(exp) = exponent_str.parse::<u64>() {
43        Some(exp.min(u64::from(u8::MAX)) as u8)
44    } else {
45        assert!(
46            !(exponent_str.is_empty() && strict),
47            "Invalid scientific notation format: missing exponent after 'e-'"
48        );
49        // If it's all digits but overflows u64, clamp to u8::MAX
50        if exponent_str.chars().all(|c| c.is_ascii_digit()) {
51            Some(u8::MAX)
52        } else if strict {
53            panic!("Invalid scientific notation exponent '{exponent_str}': must be a valid number")
54        } else {
55            None // Return None for lenient parsing
56        }
57    }
58}
59
60/// Returns the decimal precision inferred from the given string.
61///
62/// For scientific notation with large negative exponents (e.g., "1e-300", "1e-4294967296"),
63/// the precision is clamped to `u8::MAX` (255) since that represents the maximum representable
64/// precision in this system. This handles arbitrarily large exponents without panicking.
65///
66/// # Panics
67///
68/// Panics if the input string is malformed (e.g., "1e-" with no exponent value, or non-numeric
69/// exponents like "1e-abc").
70#[must_use]
71#[allow(clippy::cast_possible_truncation)]
72pub fn precision_from_str(s: &str) -> u8 {
73    let s = s.trim().to_ascii_lowercase();
74
75    // Check for scientific notation
76    if s.contains("e-") {
77        let exponent_str = s
78            .split("e-")
79            .nth(1)
80            .expect("Invalid scientific notation format: missing exponent after 'e-'");
81
82        return parse_scientific_exponent(exponent_str, true)
83            .expect("parse_scientific_exponent should return Some in strict mode");
84    }
85
86    // Check for decimal precision
87    if let Some((_, decimal_part)) = s.split_once('.') {
88        clamp_precision_with_log(decimal_part.len(), "Decimal", &s)
89    } else {
90        0
91    }
92}
93
94/// Returns the minimum increment precision inferred from the given string,
95/// ignoring trailing zeros.
96///
97/// For scientific notation with large negative exponents (e.g., "1e-300"), the precision
98/// is clamped to `u8::MAX` (255) to match the behavior of `precision_from_str`.
99#[must_use]
100#[allow(clippy::cast_possible_truncation)]
101pub fn min_increment_precision_from_str(s: &str) -> u8 {
102    let s = s.trim().to_ascii_lowercase();
103
104    // Check for scientific notation
105    if let Some(pos) = s.find('e')
106        && s[pos + 1..].starts_with('-')
107    {
108        let exponent_str = &s[pos + 2..];
109        // Use lenient parsing (returns 0 for invalid, doesn't panic)
110        return parse_scientific_exponent(exponent_str, false).unwrap_or(0);
111    }
112
113    // Check for decimal precision
114    if let Some(dot_pos) = s.find('.') {
115        let decimal_part = &s[dot_pos + 1..];
116        if decimal_part.chars().any(|c| c != '0') {
117            let trimmed_len = decimal_part.trim_end_matches('0').len();
118            return clamp_precision_with_log(trimmed_len, "Minimum increment", &s);
119        }
120        clamp_precision_with_log(decimal_part.len(), "Decimal", &s)
121    } else {
122        0
123    }
124}
125
126/// Returns a `usize` from the given bytes.
127///
128/// # Errors
129///
130/// Returns an error if there are not enough bytes to represent a `usize`.
131pub fn bytes_to_usize(bytes: &[u8]) -> anyhow::Result<usize> {
132    // Check bytes width
133    if bytes.len() >= std::mem::size_of::<usize>() {
134        let mut buffer = [0u8; std::mem::size_of::<usize>()];
135        buffer.copy_from_slice(&bytes[..std::mem::size_of::<usize>()]);
136
137        Ok(usize::from_le_bytes(buffer))
138    } else {
139        anyhow::bail!("Not enough bytes to represent a `usize`");
140    }
141}
142
143////////////////////////////////////////////////////////////////////////////////
144// Tests
145////////////////////////////////////////////////////////////////////////////////
146#[cfg(test)]
147mod tests {
148    use rstest::rstest;
149
150    use super::*;
151
152    #[rstest]
153    #[case("", 0)]
154    #[case("0", 0)]
155    #[case("1.0", 1)]
156    #[case("1.00", 2)]
157    #[case("1.23456789", 8)]
158    #[case("123456.789101112", 9)]
159    #[case("0.000000001", 9)]
160    #[case("1e-1", 1)]
161    #[case("1e-2", 2)]
162    #[case("1e-3", 3)]
163    #[case("1e8", 0)]
164    #[case("-1.23", 2)]
165    #[case("-1e-2", 2)]
166    #[case("1E-2", 2)]
167    #[case("  1.23", 2)]
168    #[case("1.23  ", 2)]
169    fn test_precision_from_str(#[case] s: &str, #[case] expected: u8) {
170        let result = precision_from_str(s);
171        assert_eq!(result, expected);
172    }
173
174    #[rstest]
175    #[case("", 0)]
176    #[case("0", 0)]
177    #[case("1.0", 1)]
178    #[case("1.00", 2)]
179    #[case("1.23456789", 8)]
180    #[case("123456.789101112", 9)]
181    #[case("0.000000001", 9)]
182    #[case("1e-1", 1)]
183    #[case("1e-2", 2)]
184    #[case("1e-3", 3)]
185    #[case("1e8", 0)]
186    #[case("-1.23", 2)]
187    #[case("-1e-2", 2)]
188    #[case("1E-2", 2)]
189    #[case("  1.23", 2)]
190    #[case("1.23  ", 2)]
191    #[case("1.010", 2)]
192    #[case("1.00100", 3)]
193    #[case("0.0001000", 4)]
194    #[case("1.000000000", 9)]
195    fn test_min_increment_precision_from_str(#[case] s: &str, #[case] expected: u8) {
196        let result = min_increment_precision_from_str(s);
197        assert_eq!(result, expected);
198    }
199
200    #[rstest]
201    fn test_bytes_to_usize_empty() {
202        let payload: Vec<u8> = vec![];
203        let result = bytes_to_usize(&payload);
204        assert!(result.is_err());
205        assert_eq!(
206            result.err().unwrap().to_string(),
207            "Not enough bytes to represent a `usize`"
208        );
209    }
210
211    #[rstest]
212    fn test_bytes_to_usize_invalid() {
213        let payload: Vec<u8> = vec![0x01, 0x02, 0x03];
214        let result = bytes_to_usize(&payload);
215        assert!(result.is_err());
216        assert_eq!(
217            result.err().unwrap().to_string(),
218            "Not enough bytes to represent a `usize`"
219        );
220    }
221
222    #[rstest]
223    fn test_bytes_to_usize_valid() {
224        let payload: Vec<u8> = vec![0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08];
225        let result = bytes_to_usize(&payload).unwrap();
226        assert_eq!(result, 0x0807_0605_0403_0201);
227        assert_eq!(result, 578_437_695_752_307_201);
228    }
229
230    #[rstest]
231    fn test_precision_from_str_large_exponent_clamped() {
232        // u8::MAX is 255, so 999 should be clamped to 255
233        let result = precision_from_str("1e-999");
234        assert_eq!(result, 255);
235    }
236
237    #[rstest]
238    fn test_precision_from_str_very_large_exponent_clamped() {
239        // Very large exponents should also be clamped to u8::MAX
240        let result = precision_from_str("1e-300");
241        assert_eq!(result, 255);
242
243        let result = precision_from_str("1e-1000000");
244        assert_eq!(result, 255);
245    }
246
247    #[rstest]
248    #[should_panic(expected = "Invalid scientific notation exponent")]
249    fn test_precision_from_str_invalid_exponent_not_numeric() {
250        let _ = precision_from_str("1e-abc");
251    }
252
253    #[rstest]
254    #[should_panic(expected = "missing exponent after 'e-'")]
255    fn test_precision_from_str_malformed_scientific_notation() {
256        // "1e-" with empty exponent should panic (fail fast on malformed input)
257        let _ = precision_from_str("1e-");
258    }
259
260    #[rstest]
261    fn test_precision_from_str_edge_case_max_u8() {
262        // u8::MAX = 255, should work
263        let result = precision_from_str("1e-255");
264        assert_eq!(result, 255);
265    }
266
267    #[rstest]
268    fn test_precision_from_str_just_above_max_u8() {
269        // 256 should be clamped to 255
270        let result = precision_from_str("1e-256");
271        assert_eq!(result, 255);
272    }
273
274    #[rstest]
275    fn test_precision_from_str_u32_overflow() {
276        // Exponent > u32::MAX (4294967296) should be clamped to 255
277        let result = precision_from_str("1e-4294967296");
278        assert_eq!(result, 255);
279    }
280
281    #[rstest]
282    fn test_precision_from_str_u64_overflow() {
283        // Exponent > u64::MAX should be clamped to 255
284        let result = precision_from_str("1e-99999999999999999999");
285        assert_eq!(result, 255);
286    }
287
288    #[rstest]
289    fn test_min_increment_precision_from_str_large_exponent() {
290        // Large exponents should be clamped to u8::MAX (255), not return 0
291        let result = min_increment_precision_from_str("1e-300");
292        assert_eq!(result, 255);
293    }
294
295    #[rstest]
296    fn test_min_increment_precision_from_str_very_large_exponent() {
297        // Very large exponents should also be clamped to 255
298        let result = min_increment_precision_from_str("1e-99999999999999999999");
299        assert_eq!(result, 255);
300    }
301
302    #[rstest]
303    fn test_min_increment_precision_from_str_consistency() {
304        // Should match precision_from_str for large exponents
305        let input = "1e-1000";
306        let precision = precision_from_str(input);
307        let min_precision = min_increment_precision_from_str(input);
308        assert_eq!(precision, min_precision);
309        assert_eq!(precision, 255);
310    }
311}