Skip to main content

nautilus_common/msgbus/
matching.rs

1// -------------------------------------------------------------------------------------------------
2//  Copyright (C) 2015-2026 Nautech Systems Pty Ltd. All rights reserved.
3//  https://nautechsystems.io
4//
5//  Licensed under the GNU Lesser General Public License Version 3.0 (the "License");
6//  You may not use this file except in compliance with the License.
7//  You may obtain a copy of the License at https://www.gnu.org/licenses/lgpl-3.0.en.html
8//
9//  Unless required by applicable law or agreed to in writing, software
10//  distributed under the License is distributed on an "AS IS" BASIS,
11//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//  See the License for the specific language governing permissions and
13//  limitations under the License.
14// -------------------------------------------------------------------------------------------------
15
16//! Pattern matching for message bus topic subscriptions.
17
18use super::mstr::{MStr, Pattern, Topic};
19
20/// Match a topic against a pattern with wildcard support.
21///
22/// Wildcards:
23/// - `*` matches zero or more characters
24/// - `?` matches exactly one character
25///
26/// Uses a greedy two-pointer algorithm. Chosen over DP (O(n*m) space),
27/// recursive backtracking (stack overflow risk), and regex (compilation overhead).
28pub fn is_matching_backtracking(topic: MStr<Topic>, pattern: MStr<Pattern>) -> bool {
29    is_matching(topic.as_bytes(), pattern.as_bytes())
30}
31
32/// Match topic bytes against pattern bytes.
33#[must_use]
34#[inline]
35pub fn is_matching(topic: &[u8], pattern: &[u8]) -> bool {
36    // Fast path for exact matches (no wildcards)
37    if topic.len() == pattern.len() && !pattern.contains(&b'*') && !pattern.contains(&b'?') {
38        return topic == pattern;
39    }
40
41    is_matching_greedy(topic, pattern)
42}
43
44/// Greedy wildcard matching. Tracks the last `*` position and backtracks
45/// when needed. O(n+m) for typical patterns, O(n*m) worst case.
46#[inline]
47fn is_matching_greedy(topic: &[u8], pattern: &[u8]) -> bool {
48    let mut i = 0;
49    let mut j = 0;
50    let mut star_idx: Option<usize> = None;
51    let mut match_idx = 0;
52
53    while i < topic.len() {
54        if j < pattern.len() && (pattern[j] == b'?' || pattern[j] == topic[i]) {
55            i += 1;
56            j += 1;
57        } else if j < pattern.len() && pattern[j] == b'*' {
58            star_idx = Some(j);
59            match_idx = i;
60            j += 1;
61        } else if let Some(si) = star_idx {
62            // Backtrack: try matching one more char with the last '*'
63            j = si + 1;
64            match_idx += 1;
65            i = match_idx;
66        } else {
67            return false;
68        }
69    }
70
71    // Skip trailing '*' in pattern
72    while j < pattern.len() && pattern[j] == b'*' {
73        j += 1;
74    }
75
76    j == pattern.len()
77}
78
79#[cfg(test)]
80mod tests {
81    use rand::{RngExt, SeedableRng, rngs::StdRng};
82    use regex::Regex;
83    use rstest::rstest;
84
85    use super::*;
86
87    #[rstest]
88    #[case("a", "*", true)]
89    #[case("a", "a", true)]
90    #[case("a", "b", false)]
91    #[case("data.quotes.BINANCE", "data.*", true)]
92    #[case("data.quotes.BINANCE", "data.quotes*", true)]
93    #[case("data.quotes.BINANCE", "data.*.BINANCE", true)]
94    #[case("data.trades.BINANCE.ETHUSDT", "data.*.BINANCE.*", true)]
95    #[case("data.trades.BINANCE.ETHUSDT", "data.*.BINANCE.ETH*", true)]
96    #[case("data.trades.BINANCE.ETHUSDT", "data.*.BINANCE.ETH???", false)]
97    #[case("data.trades.BINANCE.ETHUSD", "data.*.BINANCE.ETH???", true)]
98    // We don't support [seq] style pattern
99    #[case("data.trades.BINANCE.ETHUSDT", "data.*.BINANCE.ET[HC]USDT", false)]
100    // We don't support [!seq] style pattern
101    #[case("data.trades.BINANCE.ETHUSDT", "data.*.BINANCE.ET[!ABC]USDT", false)]
102    // We don't support [^seq] style pattern
103    #[case("data.trades.BINANCE.ETHUSDT", "data.*.BINANCE.ET[^ABC]USDT", false)]
104    fn test_is_matching(#[case] topic: &str, #[case] pattern: &str, #[case] expected: bool) {
105        assert_eq!(
106            is_matching_backtracking(topic.into(), pattern.into()),
107            expected
108        );
109    }
110
111    #[rstest]
112    // Empty and edge cases
113    #[case(b"", b"", true)]
114    #[case(b"", b"*", true)]
115    #[case(b"", b"?", false)]
116    #[case(b"", b"a", false)]
117    #[case(b"a", b"", false)]
118    // Wildcard-only patterns
119    #[case(b"abc", b"*", true)]
120    #[case(b"abc", b"***", true)]
121    #[case(b"abc", b"???", true)]
122    #[case(b"abc", b"????", false)]
123    #[case(b"abc", b"??", false)]
124    // Consecutive stars
125    #[case(b"abc", b"a**c", true)]
126    #[case(b"abc", b"**c", true)]
127    #[case(b"abc", b"a**", true)]
128    // Mixed consecutive
129    #[case(b"abc", b"*?*", true)]
130    #[case(b"ab", b"*?*", true)]
131    #[case(b"a", b"*?*", true)]
132    #[case(b"", b"*?*", false)]
133    // Pattern longer than topic
134    #[case(b"ab", b"abc", false)]
135    #[case(b"ab", b"ab?", false)]
136    fn test_is_matching_bytes(
137        #[case] topic: &[u8],
138        #[case] pattern: &[u8],
139        #[case] expected: bool,
140    ) {
141        assert_eq!(is_matching(topic, pattern), expected);
142    }
143
144    fn convert_pattern_to_regex(pattern: &str) -> String {
145        let mut regex = String::new();
146        regex.push('^');
147
148        for c in pattern.chars() {
149            match c {
150                '.' => regex.push_str("\\."),
151                '*' => regex.push_str(".*"),
152                '?' => regex.push('.'),
153                _ => regex.push(c),
154            }
155        }
156
157        regex.push('$');
158        regex
159    }
160
161    #[rstest]
162    #[case("a??.quo*es.?I?AN*ET?US*T", "^a..\\.quo.*es\\..I.AN.*ET.US.*T$")]
163    #[case("da?*.?u*?s??*NC**ETH?", "^da..*\\..u.*.s...*NC.*.*ETH.$")]
164    fn test_convert_pattern_to_regex(#[case] pat: &str, #[case] regex: &str) {
165        assert_eq!(convert_pattern_to_regex(pat), regex);
166    }
167
168    fn generate_pattern_from_topic(topic: &str, rng: &mut StdRng) -> String {
169        let mut pattern = String::new();
170
171        for c in topic.chars() {
172            let val: f64 = rng.random();
173            // 10% chance of wildcard
174            if val < 0.1 {
175                pattern.push('*');
176            }
177            // 20% chance of question mark
178            else if val < 0.3 {
179                pattern.push('?');
180            }
181            // 20% chance of skipping
182            else if val < 0.5 {
183                continue;
184            }
185            // 50% chance of keeping the character
186            else {
187                pattern.push(c);
188            };
189        }
190
191        pattern
192    }
193
194    #[rstest]
195    fn test_matching_backtracking() {
196        let topic = "data.quotes.BINANCE.ETHUSDT";
197        let mut rng = StdRng::seed_from_u64(42);
198
199        for i in 0..1000 {
200            let pattern = generate_pattern_from_topic(topic, &mut rng);
201            let regex_pattern = convert_pattern_to_regex(&pattern);
202            let regex = Regex::new(&regex_pattern).unwrap();
203            assert_eq!(
204                is_matching_backtracking(topic.into(), pattern.as_str().into()),
205                regex.is_match(topic),
206                "Failed to match on iteration: {i}, pattern: \"{pattern}\", topic: {topic}, regex: \"{regex_pattern}\""
207            );
208        }
209    }
210}