Backtest: Binance OrderBook data
Tutorial for NautilusTrader a high-performance algorithmic trading platform and event driven backtester.
info
We are currently working on this tutorial.
Overview
This tutorial runs through how to set up the data catalog and a BacktestNode
to backtest an OrderBookImbalance
strategy or order book data. This example requires you bring your own Binance order book data.
Prerequisites
- Python 3.11+ installed
- JupyterLab or similar installed (
pip install -U jupyterlab
) - NautilusTrader latest release installed (
pip install -U nautilus_trader
)
Imports
We'll start with all of our imports for the remainder of this guide:
import os
import shutil
from decimal import Decimal
from pathlib import Path
import pandas as pd
from nautilus_trader.backtest.node import BacktestDataConfig
from nautilus_trader.backtest.node import BacktestEngineConfig
from nautilus_trader.backtest.node import BacktestNode
from nautilus_trader.backtest.node import BacktestRunConfig
from nautilus_trader.backtest.node import BacktestVenueConfig
from nautilus_trader.config import ImportableStrategyConfig
from nautilus_trader.config import LoggingConfig
from nautilus_trader.core.datetime import dt_to_unix_nanos
from nautilus_trader.model.data import OrderBookDelta
from nautilus_trader.persistence.catalog import ParquetDataCatalog
from nautilus_trader.persistence.loaders import BinanceOrderBookDeltaDataLoader
from nautilus_trader.persistence.wranglers import OrderBookDeltaDataWrangler
from nautilus_trader.test_kit.providers import TestInstrumentProvider
Loading data
# Path to your data directory, using user /Downloads as an example
DATA_DIR = "~/Downloads"
data_path = Path(DATA_DIR).expanduser() / "Data" / "Binance"
raw_files = list(data_path.iterdir())
assert raw_files, f"Unable to find any histdata files in directory {data_path}"
raw_files
# First we'll load the initial order book snapshot
path_snap = data_path / "BTCUSDT_T_DEPTH_2022-11-01_depth_snap.csv"
df_snap = BinanceOrderBookDeltaDataLoader.load(path_snap)
df_snap.head()
# Then we'll load the order book updates, to save time here we're limiting to 1 million rows
path_update = data_path / "BTCUSDT_T_DEPTH_2022-11-01_depth_update.csv"
nrows = 1_000_000
df_update = BinanceOrderBookDeltaDataLoader.load(path_update, nrows=nrows)
df_update.head()
Process deltas using a wrangler
BTCUSDT_BINANCE = TestInstrumentProvider.btcusdt_binance()
wrangler = OrderBookDeltaDataWrangler(BTCUSDT_BINANCE)
deltas = wrangler.process(df_snap)
deltas += wrangler.process(df_update)
deltas.sort(key=lambda x: x.ts_init) # Ensure data is non-decreasing by `ts_init`
deltas[:10]
Set up data catalog
CATALOG_PATH = os.getcwd() + "/catalog"
# Clear if it already exists, then create fresh
if os.path.exists(CATALOG_PATH):
shutil.rmtree(CATALOG_PATH)
os.mkdir(CATALOG_PATH)
# Create a catalog instance
catalog = ParquetDataCatalog(CATALOG_PATH)
# Write instrument and ticks to catalog
catalog.write_data([BTCUSDT_BINANCE])
catalog.write_data(deltas)
# Confirm the instrument was written
catalog.instruments()
# Explore the available data in the catalog
start = dt_to_unix_nanos(pd.Timestamp("2022-11-01", tz="UTC"))
end = dt_to_unix_nanos(pd.Timestamp("2022-11-04", tz="UTC"))
deltas = catalog.order_book_deltas(start=start, end=end)
print(len(deltas))
deltas[:10]
Configure backtest
instrument = catalog.instruments()[0]
book_type = "L2_MBP" # Ensure data book type matches venue book type
data_configs = [BacktestDataConfig(
catalog_path=CATALOG_PATH,
data_cls=OrderBookDelta,
instrument_id=instrument.id,
# start_time=start, # Run across all data
# end_time=end, # Run across all data
)
]
venues_configs = [
BacktestVenueConfig(
name="BINANCE",
oms_type="NETTING",
account_type="CASH",
base_currency=None,
starting_balances=["20 BTC", "100000 USDT"],
book_type=book_type, # <-- Venues book type
)
]
strategies = [
ImportableStrategyConfig(
strategy_path="nautilus_trader.examples.strategies.orderbook_imbalance:OrderBookImbalance",
config_path="nautilus_trader.examples.strategies.orderbook_imbalance:OrderBookImbalanceConfig",
config={
"instrument_id": instrument.id,
"book_type": book_type,
"max_trade_size": Decimal("1.000"),
"min_seconds_between_triggers": 1.0,
},
),
]
# NautilusTrader currently exceeds the rate limit for Jupyter notebook logging (stdout output),
# this is why the `log_level` is set to "ERROR". If you lower this level to see
# more logging then the notebook will hang during cell execution. A fix is currently
# being investigated which involves either raising the configured rate limits for
# Jupyter, or throttling the log flushing from Nautilus.
# https://github.com/jupyterlab/jupyterlab/issues/12845
# https://github.com/deshaw/jupyterlab-limit-output
config = BacktestRunConfig(
engine=BacktestEngineConfig(
strategies=strategies,
logging=LoggingConfig(log_level="ERROR"),
),
data=data_configs,
venues=venues_configs,
)
config
Run the backtest
node = BacktestNode(configs=[config])
result = node.run()
result
from nautilus_trader.backtest.engine import BacktestEngine
from nautilus_trader.model.identifiers import Venue
engine: BacktestEngine = node.get_engine(config.id)
engine.trader.generate_order_fills_report()
engine.trader.generate_positions_report()
engine.trader.generate_account_report(Venue("BINANCE"))