nautilus_network/
backoff.rs

1// -------------------------------------------------------------------------------------------------
2//  Copyright (C) 2015-2025 Nautech Systems Pty Ltd. All rights reserved.
3//  https://nautechsystems.io
4//
5//  Licensed under the GNU Lesser General Public License Version 3.0 (the "License");
6//  You may not use this file except in compliance with the License.
7//  You may obtain a copy of the License at https://www.gnu.org/licenses/lgpl-3.0.en.html
8//
9//  Unless required by applicable law or agreed to in writing, software
10//  distributed under the License is distributed on an "AS IS" BASIS,
11//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//  See the License for the specific language governing permissions and
13//  limitations under the License.
14// -------------------------------------------------------------------------------------------------
15
16//! Provides an implementation of an exponential backoff mechanism with jitter support.
17//! It is used for managing reconnection delays in the socket clients.
18//!
19//! The backoff mechanism allows the delay to grow exponentially up to a configurable
20//! maximum, optionally applying random jitter to avoid synchronized reconnection storms.
21//! An "immediate first" flag is available so that the very first reconnect attempt
22//! can occur without any delay.
23
24use std::time::Duration;
25
26use nautilus_core::correctness::{check_in_range_inclusive_f64, check_predicate_true};
27use rand::Rng;
28
29#[derive(Clone, Debug)]
30pub struct ExponentialBackoff {
31    /// The initial backoff delay.
32    delay_initial: Duration,
33    /// The maximum delay to cap the backoff.
34    delay_max: Duration,
35    /// The current backoff delay.
36    delay_current: Duration,
37    /// The factor to multiply the delay on each iteration.
38    factor: f64,
39    /// The maximum random jitter to add (in milliseconds).
40    jitter_ms: u64,
41    /// If true, the first call to `next()` returns zero delay (immediate reconnect).
42    immediate_reconnect: bool,
43    /// The original value of `immediate_reconnect` for reset purposes.
44    immediate_reconnect_original: bool,
45}
46
47/// An exponential backoff mechanism with optional jitter and immediate-first behavior.
48///
49/// This struct computes successive delays for reconnect attempts.
50/// It starts from an initial delay and multiplies it by a factor on each iteration,
51/// capping the delay at a maximum value. Random jitter is added (up to a configured
52/// maximum) to the delay. When `immediate_first` is true, the first call to `next_duration`
53/// returns zero delay, triggering an immediate reconnect, after which the immediate flag is disabled.
54impl ExponentialBackoff {
55    /// Creates a new [`ExponentialBackoff]` instance.
56    ///
57    /// # Errors
58    ///
59    /// Returns an error if:
60    /// - `delay_initial` is zero.
61    /// - `delay_max` is less than `delay_initial`.
62    /// - `delay_max` exceeds `Duration::from_nanos(u64::MAX)` (≈584 years).
63    /// - `factor` is not in the range [1.0, 100.0] (to prevent reconnect spam).
64    pub fn new(
65        delay_initial: Duration,
66        delay_max: Duration,
67        factor: f64,
68        jitter_ms: u64,
69        immediate_first: bool,
70    ) -> anyhow::Result<Self> {
71        check_predicate_true(!delay_initial.is_zero(), "delay_initial must be non-zero")?;
72        check_predicate_true(
73            delay_max >= delay_initial,
74            "delay_max must be >= delay_initial",
75        )?;
76        check_predicate_true(
77            delay_max.as_nanos() <= u128::from(u64::MAX),
78            "delay_max exceeds maximum representable duration (≈584 years)",
79        )?;
80        check_in_range_inclusive_f64(factor, 1.0, 100.0, "factor")?;
81
82        Ok(Self {
83            delay_initial,
84            delay_max,
85            delay_current: delay_initial,
86            factor,
87            jitter_ms,
88            immediate_reconnect: immediate_first,
89            immediate_reconnect_original: immediate_first,
90        })
91    }
92
93    /// Return the next backoff delay with jitter and update the internal state.
94    ///
95    /// If the `immediate_first` flag is set and this is the first call (i.e. the current
96    /// delay equals the initial delay), it returns `Duration::ZERO` to trigger an immediate
97    /// reconnect and disables the immediate behavior for subsequent calls.
98    pub fn next_duration(&mut self) -> Duration {
99        if self.immediate_reconnect && self.delay_current == self.delay_initial {
100            self.immediate_reconnect = false;
101            return Duration::ZERO;
102        }
103
104        // Generate random jitter
105        let jitter = rand::rng().random_range(0..=self.jitter_ms);
106        let delay_with_jitter = self.delay_current + Duration::from_millis(jitter);
107
108        // Clamp the returned delay to never exceed delay_max
109        let clamped_delay = std::cmp::min(delay_with_jitter, self.delay_max);
110
111        // Prepare the next delay with overflow protection
112        // Keep all math in u128 to avoid silent truncation
113        let current_nanos = self.delay_current.as_nanos();
114        let max_nanos = self.delay_max.as_nanos();
115
116        // Use checked floating point multiplication to prevent overflow
117        let next_nanos_u128 = if current_nanos > u128::from(u64::MAX) {
118            // Current is already at max representable value, cap to max
119            max_nanos
120        } else {
121            let current_u64 = current_nanos as u64;
122            let next_f64 = current_u64 as f64 * self.factor;
123
124            // Check for overflow in the float result
125            if next_f64 > u64::MAX as f64 {
126                u128::from(u64::MAX)
127            } else {
128                u128::from(next_f64 as u64)
129            }
130        };
131
132        let clamped = std::cmp::min(next_nanos_u128, max_nanos);
133        let final_nanos = if clamped > u128::from(u64::MAX) {
134            u64::MAX
135        } else {
136            clamped as u64
137        };
138
139        self.delay_current = Duration::from_nanos(final_nanos);
140
141        clamped_delay
142    }
143
144    /// Reset the backoff to its initial state.
145    pub const fn reset(&mut self) {
146        self.delay_current = self.delay_initial;
147        self.immediate_reconnect = self.immediate_reconnect_original;
148    }
149
150    /// Returns the current base delay without jitter.
151    /// This represents the delay that would be used as the base for the next call to `next()`,
152    /// before any jitter is applied.
153    #[must_use]
154    pub const fn current_delay(&self) -> Duration {
155        self.delay_current
156    }
157}
158
159////////////////////////////////////////////////////////////////////////////////
160// Tests
161////////////////////////////////////////////////////////////////////////////////
162#[cfg(test)]
163mod tests {
164    use std::time::Duration;
165
166    use rstest::rstest;
167
168    use super::*;
169
170    #[rstest]
171    fn test_no_jitter_exponential_growth() {
172        let initial = Duration::from_millis(100);
173        let max = Duration::from_millis(1600);
174        let factor = 2.0;
175        let jitter = 0;
176        let mut backoff = ExponentialBackoff::new(initial, max, factor, jitter, false).unwrap();
177
178        // 1st call returns the initial delay
179        let d1 = backoff.next_duration();
180        assert_eq!(d1, Duration::from_millis(100));
181
182        // 2nd call: current becomes 200ms
183        let d2 = backoff.next_duration();
184        assert_eq!(d2, Duration::from_millis(200));
185
186        // 3rd call: current becomes 400ms
187        let d3 = backoff.next_duration();
188        assert_eq!(d3, Duration::from_millis(400));
189
190        // 4th call: current becomes 800ms
191        let d4 = backoff.next_duration();
192        assert_eq!(d4, Duration::from_millis(800));
193
194        // 5th call: current would be 1600ms (800 * 2) which is within the cap
195        let d5 = backoff.next_duration();
196        assert_eq!(d5, Duration::from_millis(1600));
197
198        // 6th call: should still be capped at 1600ms
199        let d6 = backoff.next_duration();
200        assert_eq!(d6, Duration::from_millis(1600));
201    }
202
203    #[rstest]
204    fn test_reset() {
205        let initial = Duration::from_millis(100);
206        let max = Duration::from_millis(1600);
207        let factor = 2.0;
208        let jitter = 0;
209        let mut backoff = ExponentialBackoff::new(initial, max, factor, jitter, false).unwrap();
210
211        // Call next() once so that the internal state updates
212        let _ = backoff.next_duration(); // current_delay becomes 200ms
213        backoff.reset();
214        let d = backoff.next_duration();
215        // After reset, the next delay should be the initial delay (100ms)
216        assert_eq!(d, Duration::from_millis(100));
217    }
218
219    #[rstest]
220    fn test_jitter_within_bounds() {
221        let initial = Duration::from_millis(100);
222        let max = Duration::from_millis(1000);
223        let factor = 2.0;
224        let jitter = 50;
225        // Run several iterations to ensure that jitter stays within bounds
226        for _ in 0..10 {
227            let mut backoff = ExponentialBackoff::new(initial, max, factor, jitter, false).unwrap();
228            // Capture the expected base delay before jitter is applied
229            let base = backoff.delay_current;
230            let delay = backoff.next_duration();
231            // The returned delay must be at least the base delay and at most base + jitter
232            let min_expected = base;
233            let max_expected = base + Duration::from_millis(jitter);
234            assert!(
235                delay >= min_expected,
236                "Delay {delay:?} is less than expected minimum {min_expected:?}"
237            );
238            assert!(
239                delay <= max_expected,
240                "Delay {delay:?} exceeds expected maximum {max_expected:?}"
241            );
242        }
243    }
244
245    #[rstest]
246    fn test_factor_less_than_two() {
247        let initial = Duration::from_millis(100);
248        let max = Duration::from_millis(200);
249        let factor = 1.5;
250        let jitter = 0;
251        let mut backoff = ExponentialBackoff::new(initial, max, factor, jitter, false).unwrap();
252
253        // First call returns 100ms
254        let d1 = backoff.next_duration();
255        assert_eq!(d1, Duration::from_millis(100));
256
257        // Second call: current_delay becomes 100 * 1.5 = 150ms
258        let d2 = backoff.next_duration();
259        assert_eq!(d2, Duration::from_millis(150));
260
261        // Third call: current_delay becomes 150 * 1.5 = 225ms, but capped to 200ms
262        let d3 = backoff.next_duration();
263        assert_eq!(d3, Duration::from_millis(200));
264
265        // Fourth call: remains at the max of 200ms
266        let d4 = backoff.next_duration();
267        assert_eq!(d4, Duration::from_millis(200));
268    }
269
270    #[rstest]
271    fn test_max_delay_is_respected() {
272        let initial = Duration::from_millis(500);
273        let max = Duration::from_millis(1000);
274        let factor = 3.0;
275        let jitter = 0;
276        let mut backoff = ExponentialBackoff::new(initial, max, factor, jitter, false).unwrap();
277
278        // 1st call returns 500ms
279        let d1 = backoff.next_duration();
280        assert_eq!(d1, Duration::from_millis(500));
281
282        // 2nd call: would be 500 * 3 = 1500ms but is capped to 1000ms
283        let d2 = backoff.next_duration();
284        assert_eq!(d2, Duration::from_millis(1000));
285
286        // Subsequent calls should continue to return the max delay
287        let d3 = backoff.next_duration();
288        assert_eq!(d3, Duration::from_millis(1000));
289    }
290
291    #[rstest]
292    fn test_current_delay_getter() {
293        let initial = Duration::from_millis(100);
294        let max = Duration::from_millis(1600);
295        let factor = 2.0;
296        let jitter = 0;
297        let mut backoff = ExponentialBackoff::new(initial, max, factor, jitter, false).unwrap();
298
299        assert_eq!(backoff.current_delay(), initial);
300
301        let _ = backoff.next_duration();
302        assert_eq!(backoff.current_delay(), Duration::from_millis(200));
303
304        let _ = backoff.next_duration();
305        assert_eq!(backoff.current_delay(), Duration::from_millis(400));
306
307        backoff.reset();
308        assert_eq!(backoff.current_delay(), initial);
309    }
310
311    #[rstest]
312    fn test_validation_zero_initial_delay() {
313        let result =
314            ExponentialBackoff::new(Duration::ZERO, Duration::from_millis(1000), 2.0, 0, false);
315        assert!(result.is_err());
316        assert!(
317            result
318                .unwrap_err()
319                .to_string()
320                .contains("delay_initial must be non-zero")
321        );
322    }
323
324    #[rstest]
325    fn test_validation_max_less_than_initial() {
326        let result = ExponentialBackoff::new(
327            Duration::from_millis(1000),
328            Duration::from_millis(500),
329            2.0,
330            0,
331            false,
332        );
333        assert!(result.is_err());
334        assert!(
335            result
336                .unwrap_err()
337                .to_string()
338                .contains("delay_max must be >= delay_initial")
339        );
340    }
341
342    #[rstest]
343    fn test_validation_factor_too_small() {
344        let result = ExponentialBackoff::new(
345            Duration::from_millis(100),
346            Duration::from_millis(1000),
347            0.5,
348            0,
349            false,
350        );
351        assert!(result.is_err());
352        assert!(result.unwrap_err().to_string().contains("factor"));
353    }
354
355    #[rstest]
356    fn test_validation_factor_too_large() {
357        let result = ExponentialBackoff::new(
358            Duration::from_millis(100),
359            Duration::from_millis(1000),
360            150.0,
361            0,
362            false,
363        );
364        assert!(result.is_err());
365        assert!(result.unwrap_err().to_string().contains("factor"));
366    }
367
368    #[rstest]
369    fn test_validation_delay_max_exceeds_u64_max_nanos() {
370        // Duration::from_nanos(u64::MAX) is approximately 584 years
371        // Try to create a backoff with delay_max exceeding this
372        let max_valid = Duration::from_nanos(u64::MAX);
373        let too_large = max_valid + Duration::from_nanos(1);
374
375        let result = ExponentialBackoff::new(Duration::from_millis(100), too_large, 2.0, 0, false);
376        assert!(result.is_err());
377        assert!(
378            result
379                .unwrap_err()
380                .to_string()
381                .contains("delay_max exceeds maximum representable duration")
382        );
383    }
384
385    #[rstest]
386    fn test_immediate_first() {
387        let initial = Duration::from_millis(100);
388        let max = Duration::from_millis(1600);
389        let factor = 2.0;
390        let jitter = 0;
391        let mut backoff = ExponentialBackoff::new(initial, max, factor, jitter, true).unwrap();
392
393        // The first call should yield an immediate (zero) delay
394        let d1 = backoff.next_duration();
395        assert_eq!(
396            d1,
397            Duration::ZERO,
398            "Expected immediate reconnect (zero delay) on first call"
399        );
400
401        // The next call should return the current delay (i.e. the base initial delay)
402        let d2 = backoff.next_duration();
403        assert_eq!(
404            d2, initial,
405            "Expected the delay to be the initial delay after immediate reconnect"
406        );
407
408        // Subsequent calls should continue with the exponential growth
409        let d3 = backoff.next_duration();
410        let expected = initial * 2; // 100ms * 2 = 200ms
411        assert_eq!(
412            d3, expected,
413            "Expected exponential growth from the initial delay"
414        );
415    }
416
417    #[rstest]
418    fn test_reset_restores_immediate_first() {
419        let initial = Duration::from_millis(100);
420        let max = Duration::from_millis(1600);
421        let factor = 2.0;
422        let jitter = 0;
423        let mut backoff = ExponentialBackoff::new(initial, max, factor, jitter, true).unwrap();
424
425        // Use immediate first
426        let d1 = backoff.next_duration();
427        assert_eq!(d1, Duration::ZERO);
428
429        // Now immediate_first should be disabled
430        let d2 = backoff.next_duration();
431        assert_eq!(d2, initial);
432
433        // Reset should restore immediate_first
434        backoff.reset();
435        let d3 = backoff.next_duration();
436        assert_eq!(
437            d3,
438            Duration::ZERO,
439            "Reset should restore immediate_first behavior"
440        );
441    }
442
443    #[rstest]
444    fn test_jitter_never_exceeds_max_delay() {
445        let initial = Duration::from_millis(100);
446        let max = Duration::from_millis(1000);
447        let factor = 2.0;
448        let jitter = 500;
449
450        let mut backoff = ExponentialBackoff::new(initial, max, factor, jitter, false).unwrap();
451
452        // Run backoff until it reaches the cap
453        while backoff.current_delay() < max {
454            backoff.next_duration();
455        }
456
457        // Now that we're at the cap, verify jitter doesn't push us over delay_max
458        for _ in 0..100 {
459            let delay = backoff.next_duration();
460            assert!(
461                delay <= max,
462                "Delay with jitter {delay:?} exceeded max {max:?}"
463            );
464        }
465    }
466}