nautilus_model/orderbook/
aggregation.rs

1// -------------------------------------------------------------------------------------------------
2//  Copyright (C) 2015-2026 Nautech Systems Pty Ltd. All rights reserved.
3//  https://nautechsystems.io
4//
5//  Licensed under the GNU Lesser General Public License Version 3.0 (the "License");
6//  You may not use this file except in compliance with the License.
7//  You may obtain a copy of the License at https://www.gnu.org/licenses/lgpl-3.0.en.html
8//
9//  Unless required by applicable law or agreed to in writing, software
10//  distributed under the License is distributed on an "AS IS" BASIS,
11//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//  See the License for the specific language governing permissions and
13//  limitations under the License.
14// -------------------------------------------------------------------------------------------------
15
16//! Functions related to normalizing and processing top-of-book events.
17
18use crate::{
19    data::order::BookOrder,
20    enums::{BookType, RecordFlag},
21};
22
23/// Generates a stable order ID from a price value.
24///
25/// # High-Precision Safety
26///
27/// Under the `high-precision` feature, `PriceRaw` is `i128` (up to ~1.7e29).
28/// Casting to `u64` would truncate the upper bits, causing distinct prices to
29/// collide on the same synthetic order_id, breaking L2/MBP aggregation.
30///
31/// This function uses deterministic AHash to compress i128 into u64:
32/// - **Deterministic**: Fixed seeds (0,0,0,0) ensure the same price always maps to the same order_id.
33/// - **Collision-resistant**: AHash provides high-quality 1-in-2^64 collision probability.
34/// - **Correct**: No structural weaknesses; handles all i128 values uniformly.
35/// - **Fast**: AHash is optimized for performance while maintaining hash quality.
36///
37/// # Collision Characteristics
38///
39/// By the pigeonhole principle, any i128→u64 mapping must have theoretical collisions.
40/// However, AHash with fixed seeds ensures:
41/// - Truly random 1-in-2^64 collision probability (no systematic patterns).
42/// - For realistic orderbooks with ~1000 price levels: collision probability < 10^-15.
43/// - No structural weaknesses at edge cases.
44///
45/// Order-book correctness is binary, so we use a high-quality deterministic hash to
46/// push collision probability effectively to zero at negligible performance cost.
47#[inline]
48fn price_to_order_id(price_raw: i128) -> u64 {
49    let build_hasher = ahash::RandomState::with_seeds(0, 0, 0, 0);
50    build_hasher.hash_one(price_raw)
51}
52
53/// Returns a price-based order ID for MBP aggregation.
54#[inline]
55fn price_based_order_id(order: &BookOrder) -> u64 {
56    #[cfg(feature = "high-precision")]
57    {
58        price_to_order_id(order.price.raw)
59    }
60    #[cfg(not(feature = "high-precision"))]
61    {
62        price_to_order_id(order.price.raw as i128)
63    }
64}
65
66pub(crate) fn pre_process_order(book_type: BookType, mut order: BookOrder, flags: u8) -> BookOrder {
67    match book_type {
68        BookType::L1_MBP => order.order_id = order.side as u64,
69        BookType::L2_MBP => order.order_id = price_based_order_id(&order),
70        BookType::L3_MBO => {
71            if RecordFlag::F_TOB.matches(flags) {
72                order.order_id = order.side as u64;
73            } else if RecordFlag::F_MBP.matches(flags) {
74                order.order_id = price_based_order_id(&order);
75            }
76        }
77    };
78    order
79}
80
81#[cfg(test)]
82mod tests {
83    use std::sync::{LazyLock, Mutex};
84
85    use ahash::AHashSet;
86    use nautilus_core::MUTEX_POISONED;
87    use rstest::rstest;
88
89    use super::*;
90
91    #[rstest]
92    fn test_price_to_order_id_deterministic() {
93        let price1 = 123456789012345678901234567890_i128;
94        let price2 = 987654321098765432109876543210_i128;
95
96        // Same price should always produce same order_id
97        let id1_a = price_to_order_id(price1);
98        let id1_b = price_to_order_id(price1);
99        assert_eq!(id1_a, id1_b, "Same price must produce same order_id");
100
101        // Different prices should produce different order_ids
102        let id2 = price_to_order_id(price2);
103        assert_ne!(
104            id1_a, id2,
105            "Different prices should produce different order_ids"
106        );
107
108        // Verify determinism across multiple calls
109        for _ in 0..100 {
110            assert_eq!(price_to_order_id(price1), id1_a);
111            assert_eq!(price_to_order_id(price2), id2);
112        }
113    }
114
115    #[rstest]
116    fn test_price_to_order_id_no_collisions() {
117        // Test that similar prices don't collide
118        let base = 1000000000_i128;
119        let mut seen = AHashSet::new();
120
121        for i in 0..1000 {
122            let price = base + i;
123            let id = price_to_order_id(price);
124            assert!(seen.insert(id), "Collision detected for price {price}");
125        }
126    }
127
128    #[rstest]
129    fn test_price_to_order_id_no_collision_across_64bit_boundary() {
130        // Test the specific collision case: price_raw = 1 vs price_raw = 1 << 64
131        let price1 = 1_i128;
132        let price2 = 1_i128 << 64; // This is 2^64
133
134        let id1 = price_to_order_id(price1);
135        let id2 = price_to_order_id(price2);
136
137        assert_ne!(
138            id1, id2,
139            "Collision detected: price 1 and price 2^64 must have different order_ids"
140        );
141    }
142
143    #[rstest]
144    fn test_price_to_order_id_handles_negative_prices() {
145        let mut seen = AHashSet::new();
146
147        // Test negative prices including edge case of -2
148        let negative_prices = vec![
149            -1_i128,
150            -2_i128,
151            -100_i128,
152            -1000000000_i128,
153            i128::MIN,
154            i128::MIN + 1,
155        ];
156
157        for &price in &negative_prices {
158            let id = price_to_order_id(price);
159            assert!(
160                seen.insert(id),
161                "Collision detected for negative price {price}"
162            );
163        }
164
165        // Also verify negative prices don't collide with positive ones
166        let positive_prices = vec![1_i128, 2_i128, 100_i128, 1000000000_i128, i128::MAX];
167
168        for &price in &positive_prices {
169            let id = price_to_order_id(price);
170            assert!(
171                seen.insert(id),
172                "Collision detected between negative and positive price: {price}"
173            );
174        }
175    }
176
177    #[rstest]
178    fn test_price_to_order_id_handles_large_values() {
179        let mut seen = AHashSet::new();
180
181        // Test values that exceed u64::MAX
182        // Note: (u64::MAX + 1) and (1 << 64) are the same value (2^64)
183        let large_values = vec![
184            u64::MAX as i128, // 2^64 - 1
185            1_i128 << 64,     // 2^64 (same as u64::MAX + 1)
186            (u64::MAX as i128) + 1000,
187            1_i128 << 65,  // 2^65
188            1_i128 << 100, // 2^100
189            i128::MAX - 1,
190            i128::MAX,
191        ];
192
193        for &price in &large_values {
194            let id = price_to_order_id(price);
195            assert!(
196                seen.insert(id),
197                "Collision detected for large price value {price}"
198            );
199        }
200    }
201
202    #[rstest]
203    fn test_price_to_order_id_multiples_of_2_pow_64() {
204        let mut seen = AHashSet::new();
205
206        // Test that multiples of 2^64 don't collide
207        // These would all collapse to the same value with naive XOR folding
208        for i in 0..10 {
209            let price = i * (1_i128 << 64);
210            let id = price_to_order_id(price);
211            assert!(
212                seen.insert(id),
213                "Collision detected for price {price} (multiple of 2^64)"
214            );
215        }
216    }
217
218    #[rstest]
219    fn test_price_to_order_id_realistic_orderbook_prices() {
220        let mut seen = AHashSet::new();
221
222        // Test realistic order book scenarios with fixed precision (9 decimals)
223        // BTCUSD at ~$50,000 with 9 decimal precision
224        let btc_base = 50000_000000000_i128;
225        for i in -1000..1000 {
226            let price = btc_base + i; // Prices from $49,999 to $50,001
227            let id = price_to_order_id(price);
228            assert!(
229                seen.insert(id),
230                "Collision detected for BTC price offset {i}"
231            );
232        }
233
234        // EURUSD at ~1.1000 with 9 decimal precision
235        let forex_base = 1_100000000_i128;
236        for i in -10000..10000 {
237            let price = forex_base + i; // Tight spreads
238            let id = price_to_order_id(price);
239            assert!(
240                seen.insert(id),
241                "Collision detected for EURUSD price offset {i}"
242            );
243        }
244
245        // Crypto with high precision (e.g., DOGEUSDT at $0.10)
246        let doge_base = 100000000_i128; // $0.10 with 9 decimals
247        for i in -100000..100000 {
248            let price = doge_base + i;
249            let id = price_to_order_id(price);
250            assert!(
251                seen.insert(id),
252                "Collision detected for DOGE price offset {i}"
253            );
254        }
255    }
256
257    #[rstest]
258    fn test_price_to_order_id_edge_case_patterns() {
259        let mut seen = AHashSet::new();
260
261        // Test powers of 2 (common in binary representations)
262        // Note: 1 << 127 produces i128::MIN (sign bit set), so this covers both positive and negative extremes
263        for power in 0..128 {
264            let price = 1_i128 << power;
265            let id = price_to_order_id(price);
266            assert!(
267                seen.insert(id),
268                "Collision detected for 2^{power} = {price}"
269            );
270        }
271
272        // Test negative powers of 2
273        // We stop at 126 because -(1 << 127) would overflow (can't negate i128::MIN)
274        for power in 0..127 {
275            let price = -(1_i128 << power);
276            let id = price_to_order_id(price);
277            assert!(
278                seen.insert(id),
279                "Collision detected for -2^{power} = {price}"
280            );
281        }
282    }
283
284    #[rstest]
285    fn test_price_to_order_id_sequential_negative_values() {
286        let mut seen = AHashSet::new();
287
288        // Test sequential negative values (important for spread instruments)
289        for i in -10000..=0 {
290            let price = i as i128;
291            let id = price_to_order_id(price);
292            assert!(seen.insert(id), "Collision detected for price {i}");
293        }
294    }
295
296    #[rstest]
297    #[case::max(i128::MAX)]
298    #[case::max_minus_1(i128::MAX - 1)]
299    #[case::min(i128::MIN)]
300    #[case::min_plus_1(i128::MIN + 1)]
301    #[case::u64_max(u64::MAX as i128)]
302    #[case::u64_max_minus_1((u64::MAX as i128) - 1)]
303    #[case::u64_max_plus_1((u64::MAX as i128) + 1)]
304    #[case::neg_u64_max(-(u64::MAX as i128))]
305    #[case::neg_u64_max_minus_1(-(u64::MAX as i128) - 1)]
306    #[case::neg_u64_max_plus_1(-(u64::MAX as i128) + 1)]
307    #[case::zero(0_i128)]
308    #[case::one(1_i128)]
309    #[case::neg_one(-1_i128)]
310    fn test_price_to_order_id_extreme_values_no_collision(#[case] price: i128) {
311        // Each test case runs independently and checks that its price
312        // produces a unique order_id by storing in a static set
313        static SEEN: LazyLock<Mutex<AHashSet<u64>>> = LazyLock::new(|| Mutex::new(AHashSet::new()));
314
315        let id = price_to_order_id(price);
316        let mut seen = SEEN.lock().expect(MUTEX_POISONED);
317        assert!(
318            seen.insert(id),
319            "Collision detected for extreme value: {price} (order_id: {id})"
320        );
321    }
322
323    #[rstest]
324    fn test_price_to_order_id_avalanche_effect() {
325        // Test that small changes in price produce large changes in hash
326        // (avalanche property)
327        let base_price = 1000000000000_i128;
328        let id1 = price_to_order_id(base_price);
329        let id2 = price_to_order_id(base_price + 1);
330
331        // Count differing bits
332        let xor = id1 ^ id2;
333        let differing_bits = xor.count_ones();
334
335        // With good avalanche, ~50% of bits should differ for a 1-bit input change
336        // We'll be lenient and require at least 20% (12 out of 64 bits)
337        assert!(
338            differing_bits >= 12,
339            "Poor avalanche: only {differing_bits}/64 bits differ for adjacent prices"
340        );
341    }
342
343    #[rstest]
344    fn test_price_to_order_id_comprehensive_collision_check() {
345        const TOTAL_TESTS: usize = 500_000;
346
347        // Comprehensive test combining all edge cases
348        let mut seen = AHashSet::new();
349        let mut collision_count = 0;
350
351        // Test 1: Dense range around zero
352        for i in -100_000..100_000 {
353            let id = price_to_order_id(i as i128);
354            if !seen.insert(id) {
355                collision_count += 1;
356            }
357        }
358
359        // Test 2: Powers and near-powers of 2
360        for power in 0..64 {
361            for offset in -10..=10 {
362                let price = (1_i128 << power) + offset;
363                let id = price_to_order_id(price);
364                if !seen.insert(id) {
365                    collision_count += 1;
366                }
367            }
368        }
369
370        // Test 3: Realistic price levels
371        for base in [100, 1000, 10000, 100000, 1000000, 10000000] {
372            for i in 0..1000 {
373                let price = base * 1_000_000_000_i128 + i;
374                let id = price_to_order_id(price);
375                if !seen.insert(id) {
376                    collision_count += 1;
377                }
378            }
379        }
380
381        // Calculate collision rate
382        let collision_rate = collision_count as f64 / TOTAL_TESTS as f64;
383
384        // For a good 128→64 bit hash, collision rate should be negligible in realistic scenarios
385        // This test uses pathological patterns (500k consecutive integers, powers of 2, etc.)
386        // AHash provides truly random collision distribution with ~0.0007% rate for such dense patterns
387        // Real orderbooks are sparse (~1000 levels) with collision probability < 10^-15
388        assert!(
389            collision_rate < 0.001,
390            "High collision rate: {collision_rate:.6}% ({collision_count}/{TOTAL_TESTS})"
391        );
392
393        println!(
394            "✓ Tested {} unique prices, {} collisions ({:.6}%)",
395            TOTAL_TESTS,
396            collision_count,
397            collision_rate * 100.0
398        );
399    }
400}