nautilus_databento/python/
loader.rs

1// -------------------------------------------------------------------------------------------------
2//  Copyright (C) 2015-2025 Nautech Systems Pty Ltd. All rights reserved.
3//  https://nautechsystems.io
4//
5//  Licensed under the GNU Lesser General Public License Version 3.0 (the "License");
6//  You may not use this file except in compliance with the License.
7//  You may obtain a copy of the License at https://www.gnu.org/licenses/lgpl-3.0.en.html
8//
9//  Unless required by applicable law or agreed to in writing, software
10//  distributed under the License is distributed on an "AS IS" BASIS,
11//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//  See the License for the specific language governing permissions and
13//  limitations under the License.
14// -------------------------------------------------------------------------------------------------
15
16//! Python bindings for the Databento data loader.
17
18use std::{collections::HashMap, path::PathBuf};
19
20use databento::dbn;
21use nautilus_core::{
22    ffi::cvec::CVec,
23    python::{IntoPyObjectNautilusExt, to_pyvalue_err},
24};
25use nautilus_model::{
26    data::{Bar, Data, InstrumentStatus, OrderBookDelta, OrderBookDepth10, QuoteTick, TradeTick},
27    identifiers::{InstrumentId, Venue},
28    python::instruments::instrument_any_to_pyobject,
29};
30use pyo3::{
31    prelude::*,
32    types::{PyCapsule, PyList},
33};
34use ustr::Ustr;
35
36use crate::{
37    loader::DatabentoDataLoader,
38    types::{DatabentoImbalance, DatabentoPublisher, DatabentoStatistics, PublisherId},
39};
40
41#[pymethods]
42impl DatabentoDataLoader {
43    #[new]
44    #[pyo3(signature = (publishers_filepath=None))]
45    fn py_new(publishers_filepath: Option<PathBuf>) -> PyResult<Self> {
46        Self::new(publishers_filepath).map_err(to_pyvalue_err)
47    }
48
49    #[pyo3(name = "load_publishers")]
50    fn py_load_publishers(&mut self, publishers_filepath: PathBuf) -> PyResult<()> {
51        self.load_publishers(publishers_filepath)
52            .map_err(to_pyvalue_err)
53    }
54
55    #[must_use]
56    #[pyo3(name = "get_publishers")]
57    fn py_get_publishers(&self) -> HashMap<u16, DatabentoPublisher> {
58        self.get_publishers()
59            .iter()
60            .map(|(&key, value)| (key, value.clone()))
61            .collect::<HashMap<u16, DatabentoPublisher>>()
62    }
63
64    #[pyo3(name = "set_dataset_for_venue")]
65    fn py_set_dataset_for_venue(&mut self, dataset: String, venue: Venue) {
66        self.set_dataset_for_venue(Ustr::from(&dataset), venue);
67    }
68
69    #[must_use]
70    #[pyo3(name = "get_dataset_for_venue")]
71    fn py_get_dataset_for_venue(&self, venue: &Venue) -> Option<String> {
72        self.get_dataset_for_venue(venue).map(ToString::to_string)
73    }
74
75    #[must_use]
76    #[pyo3(name = "get_venue_for_publisher")]
77    fn py_get_venue_for_publisher(&self, publisher_id: PublisherId) -> Option<String> {
78        self.get_venue_for_publisher(publisher_id)
79            .map(ToString::to_string)
80    }
81
82    #[pyo3(name = "schema_for_file")]
83    fn py_schema_for_file(&self, filepath: PathBuf) -> PyResult<Option<String>> {
84        self.schema_from_file(&filepath).map_err(to_pyvalue_err)
85    }
86
87    #[pyo3(name = "load_instruments")]
88    fn py_load_instruments(
89        &mut self,
90        py: Python,
91        filepath: PathBuf,
92        use_exchange_as_venue: bool,
93    ) -> PyResult<PyObject> {
94        let iter = self
95            .load_instruments(&filepath, use_exchange_as_venue)
96            .map_err(to_pyvalue_err)?;
97
98        let mut data = Vec::new();
99        for instrument in iter {
100            let py_object = instrument_any_to_pyobject(py, instrument)?;
101            data.push(py_object);
102        }
103
104        let list = PyList::new(py, &data).expect("Invalid `ExactSizeIterator`");
105
106        Ok(list.into_py_any_unwrap(py))
107    }
108
109    // Cannot include trades
110    #[pyo3(name = "load_order_book_deltas")]
111    #[pyo3(signature = (filepath, instrument_id=None, price_precision=None))]
112    fn py_load_order_book_deltas(
113        &self,
114        filepath: PathBuf,
115        instrument_id: Option<InstrumentId>,
116        price_precision: Option<u8>,
117    ) -> PyResult<Vec<OrderBookDelta>> {
118        self.load_order_book_deltas(&filepath, instrument_id, price_precision)
119            .map_err(to_pyvalue_err)
120    }
121
122    #[pyo3(name = "load_order_book_deltas_as_pycapsule")]
123    #[pyo3(signature = (filepath, instrument_id=None, price_precision=None, include_trades=None))]
124    fn py_load_order_book_deltas_as_pycapsule(
125        &self,
126        py: Python,
127        filepath: PathBuf,
128        instrument_id: Option<InstrumentId>,
129        price_precision: Option<u8>,
130        include_trades: Option<bool>,
131    ) -> PyResult<PyObject> {
132        let iter = self
133            .read_records::<dbn::MboMsg>(
134                &filepath,
135                instrument_id,
136                price_precision,
137                include_trades.unwrap_or(false),
138                None,
139            )
140            .map_err(to_pyvalue_err)?;
141
142        exhaust_data_iter_to_pycapsule(py, iter).map_err(to_pyvalue_err)
143    }
144
145    #[pyo3(name = "load_order_book_depth10")]
146    #[pyo3(signature = (filepath, instrument_id=None, price_precision=None))]
147    fn py_load_order_book_depth10(
148        &self,
149        filepath: PathBuf,
150        instrument_id: Option<InstrumentId>,
151        price_precision: Option<u8>,
152    ) -> PyResult<Vec<OrderBookDepth10>> {
153        self.load_order_book_depth10(&filepath, instrument_id, price_precision)
154            .map_err(to_pyvalue_err)
155    }
156
157    #[pyo3(name = "load_order_book_depth10_as_pycapsule")]
158    #[pyo3(signature = (filepath, instrument_id=None, price_precision=None))]
159    fn py_load_order_book_depth10_as_pycapsule(
160        &self,
161        py: Python,
162        filepath: PathBuf,
163        instrument_id: Option<InstrumentId>,
164        price_precision: Option<u8>,
165    ) -> PyResult<PyObject> {
166        let iter = self
167            .read_records::<dbn::Mbp10Msg>(&filepath, instrument_id, price_precision, false, None)
168            .map_err(to_pyvalue_err)?;
169
170        exhaust_data_iter_to_pycapsule(py, iter).map_err(to_pyvalue_err)
171    }
172
173    #[pyo3(name = "load_quotes")]
174    #[pyo3(signature = (filepath, instrument_id=None, price_precision=None))]
175    fn py_load_quotes(
176        &self,
177        filepath: PathBuf,
178        instrument_id: Option<InstrumentId>,
179        price_precision: Option<u8>,
180    ) -> PyResult<Vec<QuoteTick>> {
181        self.load_quotes(&filepath, instrument_id, price_precision)
182            .map_err(to_pyvalue_err)
183    }
184
185    #[pyo3(name = "load_quotes_as_pycapsule")]
186    #[pyo3(signature = (filepath, instrument_id=None, price_precision=None, include_trades=None))]
187    fn py_load_quotes_as_pycapsule(
188        &self,
189        py: Python,
190        filepath: PathBuf,
191        instrument_id: Option<InstrumentId>,
192        price_precision: Option<u8>,
193        include_trades: Option<bool>,
194    ) -> PyResult<PyObject> {
195        let iter = self
196            .read_records::<dbn::Mbp1Msg>(
197                &filepath,
198                instrument_id,
199                price_precision,
200                include_trades.unwrap_or(false),
201                None,
202            )
203            .map_err(to_pyvalue_err)?;
204
205        exhaust_data_iter_to_pycapsule(py, iter).map_err(to_pyvalue_err)
206    }
207
208    #[pyo3(name = "load_bbo_quotes")]
209    #[pyo3(signature = (filepath, instrument_id=None, price_precision=None))]
210    fn py_load_bbo_quotes(
211        &self,
212        filepath: PathBuf,
213        instrument_id: Option<InstrumentId>,
214        price_precision: Option<u8>,
215    ) -> PyResult<Vec<QuoteTick>> {
216        self.load_bbo_quotes(&filepath, instrument_id, price_precision)
217            .map_err(to_pyvalue_err)
218    }
219
220    #[pyo3(name = "load_bbo_quotes_as_pycapsule")]
221    #[pyo3(signature = (filepath, instrument_id=None, price_precision=None))]
222    fn py_load_bbo_quotes_as_pycapsule(
223        &self,
224        py: Python,
225        filepath: PathBuf,
226        instrument_id: Option<InstrumentId>,
227        price_precision: Option<u8>,
228    ) -> PyResult<PyObject> {
229        let iter = self
230            .read_records::<dbn::BboMsg>(&filepath, instrument_id, price_precision, false, None)
231            .map_err(to_pyvalue_err)?;
232
233        exhaust_data_iter_to_pycapsule(py, iter).map_err(to_pyvalue_err)
234    }
235
236    #[pyo3(name = "load_tbbo_trades")]
237    #[pyo3(signature = (filepath, instrument_id=None, price_precision=None))]
238    fn py_load_tbbo_trades(
239        &self,
240        filepath: PathBuf,
241        instrument_id: Option<InstrumentId>,
242        price_precision: Option<u8>,
243    ) -> PyResult<Vec<TradeTick>> {
244        self.load_tbbo_trades(&filepath, instrument_id, price_precision)
245            .map_err(to_pyvalue_err)
246    }
247
248    #[pyo3(name = "load_tbbo_trades_as_pycapsule")]
249    #[pyo3(signature = (filepath, instrument_id=None, price_precision=None))]
250    fn py_load_tbbo_trades_as_pycapsule(
251        &self,
252        py: Python,
253        filepath: PathBuf,
254        instrument_id: Option<InstrumentId>,
255        price_precision: Option<u8>,
256    ) -> PyResult<PyObject> {
257        let iter = self
258            .read_records::<dbn::TbboMsg>(&filepath, instrument_id, price_precision, false, None)
259            .map_err(to_pyvalue_err)?;
260
261        exhaust_data_iter_to_pycapsule(py, iter).map_err(to_pyvalue_err)
262    }
263
264    #[pyo3(name = "load_trades")]
265    #[pyo3(signature = (filepath, instrument_id=None, price_precision=None))]
266    fn py_load_trades(
267        &self,
268        filepath: PathBuf,
269        instrument_id: Option<InstrumentId>,
270        price_precision: Option<u8>,
271    ) -> PyResult<Vec<TradeTick>> {
272        self.load_trades(&filepath, instrument_id, price_precision)
273            .map_err(to_pyvalue_err)
274    }
275
276    #[pyo3(name = "load_trades_as_pycapsule")]
277    #[pyo3(signature = (filepath, instrument_id=None, price_precision=None))]
278    fn py_load_trades_as_pycapsule(
279        &self,
280        py: Python,
281        filepath: PathBuf,
282        instrument_id: Option<InstrumentId>,
283        price_precision: Option<u8>,
284    ) -> PyResult<PyObject> {
285        let iter = self
286            .read_records::<dbn::TradeMsg>(&filepath, instrument_id, price_precision, false, None)
287            .map_err(to_pyvalue_err)?;
288
289        exhaust_data_iter_to_pycapsule(py, iter).map_err(to_pyvalue_err)
290    }
291
292    #[pyo3(name = "load_bars")]
293    #[pyo3(signature = (filepath, instrument_id=None, price_precision=None, timestamp_on_close=true))]
294    fn py_load_bars(
295        &self,
296        filepath: PathBuf,
297        instrument_id: Option<InstrumentId>,
298        price_precision: Option<u8>,
299        timestamp_on_close: bool,
300    ) -> PyResult<Vec<Bar>> {
301        self.load_bars(
302            &filepath,
303            instrument_id,
304            price_precision,
305            Some(timestamp_on_close),
306        )
307        .map_err(to_pyvalue_err)
308    }
309
310    #[pyo3(name = "load_bars_as_pycapsule")]
311    #[pyo3(signature = (filepath, instrument_id=None, price_precision=None, timestamp_on_close=true))]
312    fn py_load_bars_as_pycapsule(
313        &self,
314        py: Python,
315        filepath: PathBuf,
316        instrument_id: Option<InstrumentId>,
317        price_precision: Option<u8>,
318        timestamp_on_close: bool,
319    ) -> PyResult<PyObject> {
320        let iter = self
321            .read_records::<dbn::OhlcvMsg>(
322                &filepath,
323                instrument_id,
324                price_precision,
325                false,
326                Some(timestamp_on_close),
327            )
328            .map_err(to_pyvalue_err)?;
329
330        exhaust_data_iter_to_pycapsule(py, iter).map_err(to_pyvalue_err)
331    }
332
333    #[pyo3(name = "load_status")]
334    #[pyo3(signature = (filepath, instrument_id=None))]
335    fn py_load_status(
336        &self,
337        filepath: PathBuf,
338        instrument_id: Option<InstrumentId>,
339    ) -> PyResult<Vec<InstrumentStatus>> {
340        let iter = self
341            .load_status_records::<dbn::StatusMsg>(&filepath, instrument_id)
342            .map_err(to_pyvalue_err)?;
343
344        let mut data = Vec::new();
345        for result in iter {
346            match result {
347                Ok(item) => data.push(item),
348                Err(e) => return Err(to_pyvalue_err(e)),
349            }
350        }
351
352        Ok(data)
353    }
354
355    #[pyo3(name = "load_imbalance")]
356    #[pyo3(signature = (filepath, instrument_id=None, price_precision=None))]
357    fn py_load_imbalance(
358        &self,
359        filepath: PathBuf,
360        instrument_id: Option<InstrumentId>,
361        price_precision: Option<u8>,
362    ) -> PyResult<Vec<DatabentoImbalance>> {
363        let iter = self
364            .read_imbalance_records::<dbn::ImbalanceMsg>(&filepath, instrument_id, price_precision)
365            .map_err(to_pyvalue_err)?;
366
367        let mut data = Vec::new();
368        for result in iter {
369            match result {
370                Ok(item) => data.push(item),
371                Err(e) => return Err(to_pyvalue_err(e)),
372            }
373        }
374
375        Ok(data)
376    }
377
378    #[pyo3(name = "load_statistics")]
379    #[pyo3(signature = (filepath, instrument_id=None, price_precision=None))]
380    fn py_load_statistics(
381        &self,
382        filepath: PathBuf,
383        instrument_id: Option<InstrumentId>,
384        price_precision: Option<u8>,
385    ) -> PyResult<Vec<DatabentoStatistics>> {
386        let iter = self
387            .read_statistics_records::<dbn::StatMsg>(&filepath, instrument_id, price_precision)
388            .map_err(to_pyvalue_err)?;
389
390        let mut data = Vec::new();
391        for result in iter {
392            match result {
393                Ok(item) => data.push(item),
394                Err(e) => return Err(to_pyvalue_err(e)),
395            }
396        }
397
398        Ok(data)
399    }
400}
401
402fn exhaust_data_iter_to_pycapsule(
403    py: Python,
404    iter: impl Iterator<Item = anyhow::Result<(Option<Data>, Option<Data>)>>,
405) -> anyhow::Result<PyObject> {
406    let mut data = Vec::new();
407    for result in iter {
408        match result {
409            Ok((Some(item1), None)) => data.push(item1),
410            Ok((None, Some(item2))) => data.push(item2),
411            Ok((Some(item1), Some(item2))) => {
412                data.push(item1);
413                data.push(item2);
414            }
415            Ok((None, None)) => {
416                continue;
417            }
418            Err(e) => return Err(e),
419        }
420    }
421
422    let cvec: CVec = data.into();
423    let capsule = PyCapsule::new::<CVec>(py, cvec, None)?;
424
425    // TODO: Improve error domain. Replace anyhow errors with nautilus
426    // errors to unify pyo3 and anyhow errors.
427    Ok(capsule.into_py_any_unwrap(py))
428}