nautilus_databento/python/
loader.rs

1// -------------------------------------------------------------------------------------------------
2//  Copyright (C) 2015-2025 Nautech Systems Pty Ltd. All rights reserved.
3//  https://nautechsystems.io
4//
5//  Licensed under the GNU Lesser General Public License Version 3.0 (the "License");
6//  You may not use this file except in compliance with the License.
7//  You may obtain a copy of the License at https://www.gnu.org/licenses/lgpl-3.0.en.html
8//
9//  Unless required by applicable law or agreed to in writing, software
10//  distributed under the License is distributed on an "AS IS" BASIS,
11//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//  See the License for the specific language governing permissions and
13//  limitations under the License.
14// -------------------------------------------------------------------------------------------------
15
16//! Python bindings for the Databento data loader.
17
18use std::{collections::HashMap, path::PathBuf};
19
20use databento::dbn;
21use nautilus_core::{
22    ffi::cvec::CVec,
23    python::{IntoPyObjectNautilusExt, to_pyvalue_err},
24};
25use nautilus_model::{
26    data::{Bar, Data, InstrumentStatus, OrderBookDelta, OrderBookDepth10, QuoteTick, TradeTick},
27    identifiers::{InstrumentId, Venue},
28    python::instruments::instrument_any_to_pyobject,
29};
30use pyo3::{
31    prelude::*,
32    types::{PyCapsule, PyList},
33};
34use ustr::Ustr;
35
36use crate::{
37    loader::DatabentoDataLoader,
38    types::{DatabentoImbalance, DatabentoPublisher, DatabentoStatistics, PublisherId},
39};
40
41#[pymethods]
42impl DatabentoDataLoader {
43    #[new]
44    #[pyo3(signature = (publishers_filepath=None))]
45    fn py_new(publishers_filepath: Option<PathBuf>) -> PyResult<Self> {
46        Self::new(publishers_filepath).map_err(to_pyvalue_err)
47    }
48
49    #[pyo3(name = "load_publishers")]
50    fn py_load_publishers(&mut self, publishers_filepath: PathBuf) -> PyResult<()> {
51        self.load_publishers(publishers_filepath)
52            .map_err(to_pyvalue_err)
53    }
54
55    #[must_use]
56    #[pyo3(name = "get_publishers")]
57    fn py_get_publishers(&self) -> HashMap<u16, DatabentoPublisher> {
58        self.get_publishers()
59            .iter()
60            .map(|(&key, value)| (key, value.clone()))
61            .collect::<HashMap<u16, DatabentoPublisher>>()
62    }
63
64    #[pyo3(name = "set_dataset_for_venue")]
65    fn py_set_dataset_for_venue(&mut self, dataset: String, venue: Venue) {
66        self.set_dataset_for_venue(Ustr::from(&dataset), venue);
67    }
68
69    #[must_use]
70    #[pyo3(name = "get_dataset_for_venue")]
71    fn py_get_dataset_for_venue(&self, venue: &Venue) -> Option<String> {
72        self.get_dataset_for_venue(venue).map(ToString::to_string)
73    }
74
75    #[must_use]
76    #[pyo3(name = "get_venue_for_publisher")]
77    fn py_get_venue_for_publisher(&self, publisher_id: PublisherId) -> Option<String> {
78        self.get_venue_for_publisher(publisher_id)
79            .map(ToString::to_string)
80    }
81
82    #[pyo3(name = "schema_for_file")]
83    fn py_schema_for_file(&self, filepath: PathBuf) -> PyResult<Option<String>> {
84        self.schema_from_file(&filepath).map_err(to_pyvalue_err)
85    }
86
87    #[pyo3(name = "load_instruments")]
88    fn py_load_instruments(
89        &mut self,
90        py: Python,
91        filepath: PathBuf,
92        use_exchange_as_venue: bool,
93    ) -> PyResult<PyObject> {
94        let iter = self
95            .load_instruments(&filepath, use_exchange_as_venue)
96            .map_err(to_pyvalue_err)?;
97
98        let mut data = Vec::new();
99        for instrument in iter {
100            let py_object = instrument_any_to_pyobject(py, instrument)?;
101            data.push(py_object);
102        }
103
104        let list = PyList::new(py, &data).expect("Invalid `ExactSizeIterator`");
105
106        Ok(list.into_py_any_unwrap(py))
107    }
108
109    // Cannot include trades
110    #[pyo3(name = "load_order_book_deltas")]
111    #[pyo3(signature = (filepath, instrument_id=None, price_precision=None))]
112    fn py_load_order_book_deltas(
113        &self,
114        filepath: PathBuf,
115        instrument_id: Option<InstrumentId>,
116        price_precision: Option<u8>,
117    ) -> PyResult<Vec<OrderBookDelta>> {
118        self.load_order_book_deltas(&filepath, instrument_id, price_precision)
119            .map_err(to_pyvalue_err)
120    }
121
122    #[pyo3(name = "load_order_book_deltas_as_pycapsule")]
123    #[pyo3(signature = (filepath, instrument_id=None, price_precision=None, include_trades=None))]
124    fn py_load_order_book_deltas_as_pycapsule(
125        &self,
126        py: Python,
127        filepath: PathBuf,
128        instrument_id: Option<InstrumentId>,
129        price_precision: Option<u8>,
130        include_trades: Option<bool>,
131    ) -> PyResult<PyObject> {
132        let iter = self
133            .read_records::<dbn::MboMsg>(
134                &filepath,
135                instrument_id,
136                price_precision,
137                include_trades.unwrap_or(false),
138                None,
139            )
140            .map_err(to_pyvalue_err)?;
141
142        exhaust_data_iter_to_pycapsule(py, iter).map_err(to_pyvalue_err)
143    }
144
145    #[pyo3(name = "load_order_book_depth10")]
146    #[pyo3(signature = (filepath, instrument_id=None, price_precision=None))]
147    fn py_load_order_book_depth10(
148        &self,
149        filepath: PathBuf,
150        instrument_id: Option<InstrumentId>,
151        price_precision: Option<u8>,
152    ) -> PyResult<Vec<OrderBookDepth10>> {
153        self.load_order_book_depth10(&filepath, instrument_id, price_precision)
154            .map_err(to_pyvalue_err)
155    }
156
157    #[pyo3(name = "load_order_book_depth10_as_pycapsule")]
158    #[pyo3(signature = (filepath, instrument_id=None, price_precision=None))]
159    fn py_load_order_book_depth10_as_pycapsule(
160        &self,
161        py: Python,
162        filepath: PathBuf,
163        instrument_id: Option<InstrumentId>,
164        price_precision: Option<u8>,
165    ) -> PyResult<PyObject> {
166        let iter = self
167            .read_records::<dbn::Mbp10Msg>(&filepath, instrument_id, price_precision, false, None)
168            .map_err(to_pyvalue_err)?;
169
170        exhaust_data_iter_to_pycapsule(py, iter).map_err(to_pyvalue_err)
171    }
172
173    #[pyo3(name = "load_quotes")]
174    #[pyo3(signature = (filepath, instrument_id=None, price_precision=None))]
175    fn py_load_quotes(
176        &self,
177        filepath: PathBuf,
178        instrument_id: Option<InstrumentId>,
179        price_precision: Option<u8>,
180    ) -> PyResult<Vec<QuoteTick>> {
181        self.load_quotes(&filepath, instrument_id, price_precision)
182            .map_err(to_pyvalue_err)
183    }
184
185    #[pyo3(name = "load_quotes_as_pycapsule")]
186    #[pyo3(signature = (filepath, instrument_id=None, price_precision=None, include_trades=None))]
187    fn py_load_quotes_as_pycapsule(
188        &self,
189        py: Python,
190        filepath: PathBuf,
191        instrument_id: Option<InstrumentId>,
192        price_precision: Option<u8>,
193        include_trades: Option<bool>,
194    ) -> PyResult<PyObject> {
195        let iter = self
196            .read_records::<dbn::Mbp1Msg>(
197                &filepath,
198                instrument_id,
199                price_precision,
200                include_trades.unwrap_or(false),
201                None,
202            )
203            .map_err(to_pyvalue_err)?;
204
205        exhaust_data_iter_to_pycapsule(py, iter).map_err(to_pyvalue_err)
206    }
207
208    #[pyo3(name = "load_bbo_quotes")]
209    #[pyo3(signature = (filepath, instrument_id=None, price_precision=None))]
210    fn py_load_bbo_quotes(
211        &self,
212        filepath: PathBuf,
213        instrument_id: Option<InstrumentId>,
214        price_precision: Option<u8>,
215    ) -> PyResult<Vec<QuoteTick>> {
216        self.load_bbo_quotes(&filepath, instrument_id, price_precision)
217            .map_err(to_pyvalue_err)
218    }
219
220    #[pyo3(name = "load_bbo_quotes_as_pycapsule")]
221    #[pyo3(signature = (filepath, instrument_id=None, price_precision=None))]
222    fn py_load_bbo_quotes_as_pycapsule(
223        &self,
224        py: Python,
225        filepath: PathBuf,
226        instrument_id: Option<InstrumentId>,
227        price_precision: Option<u8>,
228    ) -> PyResult<PyObject> {
229        let iter = self
230            .read_records::<dbn::BboMsg>(&filepath, instrument_id, price_precision, false, None)
231            .map_err(to_pyvalue_err)?;
232
233        exhaust_data_iter_to_pycapsule(py, iter).map_err(to_pyvalue_err)
234    }
235
236    #[pyo3(name = "load_tbbo_trades")]
237    #[pyo3(signature = (filepath, instrument_id=None, price_precision=None))]
238    fn py_load_tbbo_trades(
239        &self,
240        filepath: PathBuf,
241        instrument_id: Option<InstrumentId>,
242        price_precision: Option<u8>,
243    ) -> PyResult<Vec<TradeTick>> {
244        self.load_tbbo_trades(&filepath, instrument_id, price_precision)
245            .map_err(to_pyvalue_err)
246    }
247
248    #[pyo3(name = "load_tbbo_trades_as_pycapsule")]
249    #[pyo3(signature = (filepath, instrument_id=None, price_precision=None))]
250    fn py_load_tbbo_trades_as_pycapsule(
251        &self,
252        py: Python,
253        filepath: PathBuf,
254        instrument_id: Option<InstrumentId>,
255        price_precision: Option<u8>,
256    ) -> PyResult<PyObject> {
257        let iter = self
258            .read_records::<dbn::TbboMsg>(&filepath, instrument_id, price_precision, false, None)
259            .map_err(to_pyvalue_err)?;
260
261        exhaust_data_iter_to_pycapsule(py, iter).map_err(to_pyvalue_err)
262    }
263
264    #[pyo3(name = "load_tcbbo_trades")]
265    #[pyo3(signature = (filepath, instrument_id=None, price_precision=None))]
266    fn py_load_tcbbo_trades(
267        &self,
268        filepath: PathBuf,
269        instrument_id: Option<InstrumentId>,
270        price_precision: Option<u8>,
271    ) -> PyResult<Vec<TradeTick>> {
272        self.load_tcbbo_trades(&filepath, instrument_id, price_precision)
273            .map_err(to_pyvalue_err)
274    }
275
276    #[pyo3(name = "load_tcbbo_trades_as_pycapsule")]
277    #[pyo3(signature = (filepath, instrument_id=None, price_precision=None))]
278    fn py_load_tcbbo_trades_as_pycapsule(
279        &self,
280        py: Python,
281        filepath: PathBuf,
282        instrument_id: Option<InstrumentId>,
283        price_precision: Option<u8>,
284    ) -> PyResult<PyObject> {
285        let iter = self
286            .read_records::<dbn::CbboMsg>(&filepath, instrument_id, price_precision, false, None)
287            .map_err(to_pyvalue_err)?;
288
289        exhaust_data_iter_to_pycapsule(py, iter).map_err(to_pyvalue_err)
290    }
291
292    #[pyo3(name = "load_trades")]
293    #[pyo3(signature = (filepath, instrument_id=None, price_precision=None))]
294    fn py_load_trades(
295        &self,
296        filepath: PathBuf,
297        instrument_id: Option<InstrumentId>,
298        price_precision: Option<u8>,
299    ) -> PyResult<Vec<TradeTick>> {
300        self.load_trades(&filepath, instrument_id, price_precision)
301            .map_err(to_pyvalue_err)
302    }
303
304    #[pyo3(name = "load_trades_as_pycapsule")]
305    #[pyo3(signature = (filepath, instrument_id=None, price_precision=None))]
306    fn py_load_trades_as_pycapsule(
307        &self,
308        py: Python,
309        filepath: PathBuf,
310        instrument_id: Option<InstrumentId>,
311        price_precision: Option<u8>,
312    ) -> PyResult<PyObject> {
313        let iter = self
314            .read_records::<dbn::TradeMsg>(&filepath, instrument_id, price_precision, false, None)
315            .map_err(to_pyvalue_err)?;
316
317        exhaust_data_iter_to_pycapsule(py, iter).map_err(to_pyvalue_err)
318    }
319
320    #[pyo3(name = "load_bars")]
321    #[pyo3(signature = (filepath, instrument_id=None, price_precision=None, timestamp_on_close=true))]
322    fn py_load_bars(
323        &self,
324        filepath: PathBuf,
325        instrument_id: Option<InstrumentId>,
326        price_precision: Option<u8>,
327        timestamp_on_close: bool,
328    ) -> PyResult<Vec<Bar>> {
329        self.load_bars(
330            &filepath,
331            instrument_id,
332            price_precision,
333            Some(timestamp_on_close),
334        )
335        .map_err(to_pyvalue_err)
336    }
337
338    #[pyo3(name = "load_bars_as_pycapsule")]
339    #[pyo3(signature = (filepath, instrument_id=None, price_precision=None, timestamp_on_close=true))]
340    fn py_load_bars_as_pycapsule(
341        &self,
342        py: Python,
343        filepath: PathBuf,
344        instrument_id: Option<InstrumentId>,
345        price_precision: Option<u8>,
346        timestamp_on_close: bool,
347    ) -> PyResult<PyObject> {
348        let iter = self
349            .read_records::<dbn::OhlcvMsg>(
350                &filepath,
351                instrument_id,
352                price_precision,
353                false,
354                Some(timestamp_on_close),
355            )
356            .map_err(to_pyvalue_err)?;
357
358        exhaust_data_iter_to_pycapsule(py, iter).map_err(to_pyvalue_err)
359    }
360
361    #[pyo3(name = "load_status")]
362    #[pyo3(signature = (filepath, instrument_id=None))]
363    fn py_load_status(
364        &self,
365        filepath: PathBuf,
366        instrument_id: Option<InstrumentId>,
367    ) -> PyResult<Vec<InstrumentStatus>> {
368        let iter = self
369            .load_status_records::<dbn::StatusMsg>(&filepath, instrument_id)
370            .map_err(to_pyvalue_err)?;
371
372        let mut data = Vec::new();
373        for result in iter {
374            match result {
375                Ok(item) => data.push(item),
376                Err(e) => return Err(to_pyvalue_err(e)),
377            }
378        }
379
380        Ok(data)
381    }
382
383    #[pyo3(name = "load_imbalance")]
384    #[pyo3(signature = (filepath, instrument_id=None, price_precision=None))]
385    fn py_load_imbalance(
386        &self,
387        filepath: PathBuf,
388        instrument_id: Option<InstrumentId>,
389        price_precision: Option<u8>,
390    ) -> PyResult<Vec<DatabentoImbalance>> {
391        let iter = self
392            .read_imbalance_records::<dbn::ImbalanceMsg>(&filepath, instrument_id, price_precision)
393            .map_err(to_pyvalue_err)?;
394
395        let mut data = Vec::new();
396        for result in iter {
397            match result {
398                Ok(item) => data.push(item),
399                Err(e) => return Err(to_pyvalue_err(e)),
400            }
401        }
402
403        Ok(data)
404    }
405
406    #[pyo3(name = "load_statistics")]
407    #[pyo3(signature = (filepath, instrument_id=None, price_precision=None))]
408    fn py_load_statistics(
409        &self,
410        filepath: PathBuf,
411        instrument_id: Option<InstrumentId>,
412        price_precision: Option<u8>,
413    ) -> PyResult<Vec<DatabentoStatistics>> {
414        let iter = self
415            .read_statistics_records::<dbn::StatMsg>(&filepath, instrument_id, price_precision)
416            .map_err(to_pyvalue_err)?;
417
418        let mut data = Vec::new();
419        for result in iter {
420            match result {
421                Ok(item) => data.push(item),
422                Err(e) => return Err(to_pyvalue_err(e)),
423            }
424        }
425
426        Ok(data)
427    }
428}
429
430fn exhaust_data_iter_to_pycapsule(
431    py: Python,
432    iter: impl Iterator<Item = anyhow::Result<(Option<Data>, Option<Data>)>>,
433) -> anyhow::Result<PyObject> {
434    let mut data = Vec::new();
435    for result in iter {
436        match result {
437            Ok((Some(item1), None)) => data.push(item1),
438            Ok((None, Some(item2))) => data.push(item2),
439            Ok((Some(item1), Some(item2))) => {
440                data.push(item1);
441                data.push(item2);
442            }
443            Ok((None, None)) => {
444                continue;
445            }
446            Err(e) => return Err(e),
447        }
448    }
449
450    let cvec: CVec = data.into();
451    let capsule = PyCapsule::new_with_destructor::<CVec, _>(py, cvec, None, |_, _| {})?;
452
453    // TODO: Improve error domain. Replace anyhow errors with nautilus
454    // errors to unify pyo3 and anyhow errors.
455    Ok(capsule.into_py_any_unwrap(py))
456}