Custom Market Objects
FinLab supports Taiwan stocks (TWMarket), US stocks (USMarket), and Emerging Market (ROTCMarket) by default. If you want to backtest cryptocurrencies, futures, international stocks, or use custom price data, you can do so by inheriting the Market class.
Why Custom Markets?
Different markets have different characteristics:
- Different trading hours: US and Taiwan markets have different open/close times
- Different price fields: Cryptocurrencies may not have a traditional "open price" concept
- Different data sources: Load prices from CSV, API, or database
- Different trading rules: Futures have leverage; cryptocurrencies have no price limits
- Different benchmarks: Taiwan stocks benchmark against TAIEX; US stocks against S&P 500
Quick Start: Using Built-in Markets
FinLab provides 3 built-in markets:
from finlab import data, backtest
from finlab.markets.tw import TWMarket
from finlab.markets.us import USMarket
from finlab.markets.rotc import ROTCMarket
# Taiwan market (default)
close = data.get('price:收盤價')
position = close > close.average(20)
report = backtest.sim(position, resample='M')
# Equivalent to:
report = backtest.sim(position, resample='M', market=TWMarket())
# US market
us_close = data.get('us_price:adj_close')
us_position = us_close > us_close.average(50)
report = backtest.sim(us_position, resample='W', market=USMarket())
# Emerging market (ROTC)
rotc_close = data.get('rotc_price:收盤價')
rotc_position = rotc_close > 10
report = backtest.sim(rotc_position, resample='M', market=ROTCMarket())
Built-in Market Comparison
| Feature | TWMarket | USMarket | ROTCMarket |
|---|---|---|---|
| Market name | 'tw_stock' |
'us_stock' |
'rotc_stock' |
| Data frequency | Daily ('1d') |
Daily ('1d') |
Daily ('1d') |
| Benchmark index | TAIEX | S&P 500 | (None) |
| Timezone | Asia/Taipei | US/Eastern | Asia/Taipei |
| Market close | 15:00 | 16:00 | 14:00 |
| Price limits | 10% | None | None |
| Special stock types | Disposition/Full-delivery | None | None |
Custom Market Class
Basic Example: Cryptocurrency Market
Suppose you have daily close prices for Bitcoin (BTC) and Ethereum (ETH) in CSV files:
from finlab.market import Market
import pandas as pd
class CryptoMarket(Market):
@staticmethod
def get_name():
"""Return market name"""
return 'crypto'
@staticmethod
def get_freq():
"""Return data frequency"""
return '1d' # Daily
def get_price(self, trade_at_price='close', adj=True):
"""Get price data
Args:
trade_at_price: One of 'open', 'close', 'high', 'low'
adj: Whether to use adjusted prices (usually not needed for crypto)
Returns:
pd.DataFrame: index is dates, columns are coin symbols
"""
# Load prices from CSV
df = pd.read_csv(f'crypto_{trade_at_price}.csv', index_col=0, parse_dates=True)
return df
@staticmethod
def get_benchmark():
"""Benchmark index (e.g., use BTC as benchmark)"""
df = pd.read_csv('crypto_close.csv', index_col=0, parse_dates=True)
return df['BTC']
# Usage
from finlab.backtest import sim
# Assuming CSV format:
# date,BTC,ETH,BNB
# 2020-01-01,7200,130,15
# 2020-01-02,7350,135,16
close = pd.read_csv('crypto_close.csv', index_col=0, parse_dates=True)
position = close > close.rolling(20).mean()
report = sim(position, market=CryptoMarket(), resample='W')
report.display()
Advanced Example: Futures Market (with Leverage)
Futures have leverage, which can be simulated by modifying prices:
class FuturesMarket(Market):
def __init__(self, leverage=10):
"""
Args:
leverage: Leverage multiplier
"""
self.leverage = leverage
self.prices = pd.read_csv('futures_price.csv', index_col=0, parse_dates=True)
@staticmethod
def get_name():
return 'futures'
@staticmethod
def get_freq():
return '1d'
def get_price(self, trade_at_price='close', adj=True):
"""Return leveraged returns"""
price = self.prices[trade_at_price]
# Calculate daily returns multiplied by leverage
daily_return = price.pct_change() * self.leverage
# Convert back to price (cumulative from 100)
leveraged_price = (1 + daily_return).fillna(1).cumprod() * 100
return leveraged_price
@staticmethod
def get_benchmark():
# Use 1x leverage as benchmark
df = pd.read_csv('futures_price.csv', index_col=0, parse_dates=True)
return df['close'].iloc[:, 0]
# Backtest with 10x leverage
report = sim(position, market=FuturesMarket(leverage=10))
Market Class Complete API
When inheriting the Market class, you can override the following methods:
Required Methods
get_price(trade_at_price, adj=True)
Returns price data.
def get_price(self, trade_at_price='close', adj=True) -> pd.DataFrame:
"""
Args:
trade_at_price (str): One of 'open', 'close', 'high', 'low', 'volume'
adj (bool): Whether to return adjusted prices (accounting for splits, dividends, etc.)
Returns:
pd.DataFrame:
- index: Dates (DatetimeIndex)
- columns: Stock/asset symbols
- values: Prices
Examples:
Return format example:
| date | BTC | ETH | BNB |
|:-----------|-------:|-------:|-------:|
| 2020-01-01 | 7200 | 130 | 15 |
| 2020-01-02 | 7350 | 135 | 16 |
| 2020-01-03 | 7100 | 128 | 14.5 |
"""
# Must implement
raise NotImplementedError()
Optional Static Methods
get_name()
Returns the market name, used for identifying the market type.
get_freq()
Returns data frequency, affecting return calculations.
@staticmethod
def get_freq() -> str:
return '1d' # Daily
# Other common: '1h' (hourly), '4h', '1w' (weekly)
get_benchmark()
Returns the benchmark index for performance comparison.
@staticmethod
def get_benchmark() -> pd.Series:
"""
Returns:
pd.Series: Benchmark index price series
- index: Dates
- values: Index prices
"""
return pd.Series([]) # Default is empty (no benchmark displayed)
get_asset_id_to_name()
Returns a mapping from asset ID to name, used in report display.
@staticmethod
def get_asset_id_to_name() -> dict:
"""
Returns:
dict: asset_id -> asset_name mapping
Examples:
{'BTC': 'Bitcoin', 'ETH': 'Ethereum'}
"""
return {}
get_market_value()
Returns market capitalization data for market-cap-weighted backtesting.
@staticmethod
def get_market_value() -> pd.DataFrame:
"""
Returns:
pd.DataFrame: Market cap data
- index: Dates
- columns: Stock symbols
- values: Market cap values
"""
return pd.DataFrame()
get_industry()
Returns industry classification for industry analysis.
@staticmethod
def get_industry() -> dict:
"""
Returns:
dict: asset_id -> industry mapping
Examples:
{'BTC': 'Cryptocurrency', 'ETH': 'Cryptocurrency'}
"""
return {}
market_close_at_timestamp(timestamp)
Returns the market close time for a given timestamp, used in live trading.
def market_close_at_timestamp(self, timestamp=None) -> pd.Timestamp:
"""
Args:
timestamp: Query time point (defaults to latest)
Returns:
pd.Timestamp: Most recent market close time
"""
# Crypto trades 24 hours; use 23:59 as close
if timestamp is None:
timestamp = pd.Timestamp.now()
return pd.Timestamp(timestamp.date()) + pd.Timedelta('23:59:00')
Using Custom Data Sources
1. Loading from CSV
The simplest approach, suitable for one-off data:
class CSVMarket(Market):
def __init__(self, csv_path):
self.csv_path = csv_path
@staticmethod
def get_name():
return 'csv_market'
def get_price(self, trade_at_price='close', adj=True):
df = pd.read_csv(self.csv_path, index_col=0, parse_dates=True)
return df
# Usage
report = sim(position, market=CSVMarket('my_prices.csv'))
2. Dynamic Loading from API
Suitable for data that needs real-time updates:
import requests
class APIMarket(Market):
def __init__(self, api_url):
self.api_url = api_url
@staticmethod
def get_name():
return 'api_market'
def get_price(self, trade_at_price='close', adj=True):
# Get JSON data from API
response = requests.get(f'{self.api_url}/{trade_at_price}')
data = response.json()
# Convert to DataFrame
df = pd.DataFrame(data)
df['date'] = pd.to_datetime(df['date'])
df = df.set_index('date')
return df
# Usage
report = sim(position, market=APIMarket('https://api.example.com/prices'))
3. Hybrid Loading from FinLab Database
Combine FinLab data with custom data:
from finlab import data
class HybridMarket(Market):
@staticmethod
def get_name():
return 'hybrid'
def get_price(self, trade_at_price='close', adj=True):
# Get Taiwan stock prices from FinLab
tw_close = data.get('price:收盤價')
# Load custom cryptocurrency prices
crypto_close = pd.read_csv('crypto_close.csv', index_col=0, parse_dates=True)
# Merge both DataFrames (outer join)
combined = pd.concat([tw_close, crypto_close], axis=1)
return combined
# Usage (backtest Taiwan stocks + crypto together)
report = sim(position, market=HybridMarket())
Advanced Features
1. Custom Trading Price Calculation
Override get_trading_price() to customize execution prices:
class CustomPriceMarket(Market):
def get_price(self, trade_at_price='close', adj=True):
# ... return basic prices
def get_trading_price(self, name, adj=True):
"""Custom execution price calculation"""
if name == 'vwap': # Volume-weighted average price
open_price = self.get_price('open', adj=adj)
close_price = self.get_price('close', adj=adj)
high_price = self.get_price('high', adj=adj)
low_price = self.get_price('low', adj=adj)
volume = self.get_price('volume', adj=False)
vwap = (open_price + close_price + high_price + low_price) / 4
return vwap
else:
# Use default logic for other cases
return super().get_trading_price(name, adj=adj)
# Use VWAP as execution price
report = sim(position, market=CustomPriceMarket(), trade_at='vwap')
2. Multi-timezone Support
Handle cross-timezone markets:
import datetime
class MultiTimezoneMarket(Market):
@staticmethod
def get_name():
return 'multi_tz'
def market_close_at_timestamp(self, timestamp=None):
"""Different markets have different close times"""
if timestamp is None:
timestamp = pd.Timestamp.now()
# For a global 24-hour market, use UTC
timestamp_utc = timestamp.tz_convert('UTC')
market_close = pd.Timestamp(timestamp_utc.date()) + pd.Timedelta('23:59:59')
return market_close.tz_localize('UTC')
3. Implementing Market Holidays
Exclude weekends and holidays:
class WithHolidaysMarket(Market):
def __init__(self):
# Define holidays (e.g., Taiwan national holidays)
self.holidays = ['2024-01-01', '2024-02-08', '2024-02-09']
self.holidays = [pd.Timestamp(d) for d in self.holidays]
def get_price(self, trade_at_price='close', adj=True):
df = pd.read_csv('prices.csv', index_col=0, parse_dates=True)
# Remove weekends
df = df[df.index.dayofweek < 5]
# Remove holidays
df = df[~df.index.isin(self.holidays)]
return df
Testing Custom Markets
After building a market object, always test it:
# 1. Check price data format
market = CryptoMarket()
close = market.get_price('close')
print(close.head())
print(close.index) # Should be DatetimeIndex
print(close.shape) # (num_dates, num_stocks)
# 2. Check benchmark index
benchmark = market.get_benchmark()
print(benchmark.head())
# 3. Run a simple backtest
position = close > close.average(20)
report = sim(position, market=market, resample='M', upload=False)
report.display()
# 4. Check report's market name
print(report.market.get_name()) # Should be 'crypto'
FAQ
Q1: Why is the adj=True parameter needed?
adj=True means using adjusted prices (accounting for stock splits, dividends, etc.):
- Taiwan stocks: Historical prices are adjusted after ex-rights/ex-dividends to ensure correct return calculations
- US stocks: Also require price adjustment
- Cryptocurrency: No ex-rights/dividends; the
adjparameter can be ignored
# Taiwan stock example
tw_market = TWMarket()
adj_close = tw_market.get_price('close', adj=True) # Adjusted price
raw_close = tw_market.get_price('close', adj=False) # Raw closing price
Q2: How to handle missing values (NaN)?
FinLab automatically forward-fills missing values, but it is recommended to handle them in get_price():
def get_price(self, trade_at_price='close', adj=True):
df = pd.read_csv('prices.csv', index_col=0, parse_dates=True)
# Method 1: Forward fill
df = df.ffill()
# Method 2: Remove stocks with missing values
df = df.dropna(axis=1, how='any')
# Method 3: Fill with 0 (not recommended)
df = df.fillna(0)
return df
Q3: Can I use finlab.data.get() inside get_price()?
Yes! This is the standard approach for mixing FinLab data:
from finlab import data
class MixedMarket(Market):
def get_price(self, trade_at_price='close', adj=True):
# Use FinLab Taiwan stock data
tw_close = data.get('price:收盤價')
# Filter specific stocks
tw_close = tw_close[['2330', '2317', '2454']]
return tw_close
Q4: How to simulate transaction costs?
Transaction costs are set in the sim() function, not in the Market object:
# Crypto typically has lower fees and no tax
report = sim(
position,
market=CryptoMarket(),
fee_ratio=0.001, # 0.1% commission
tax_ratio=0 # No transaction tax
)
# Taiwan stock defaults: fee_ratio=0.001425, tax_ratio=0.003
Q5: Why is get_benchmark() a static method?
Because the benchmark index is usually fixed and does not depend on instance variables. But if you need a dynamic benchmark:
class DynamicBenchmarkMarket(Market):
def __init__(self, benchmark_ticker):
self.benchmark_ticker = benchmark_ticker
def get_benchmark(self): # Changed to instance method (removed @staticmethod)
df = pd.read_csv('benchmarks.csv', index_col=0, parse_dates=True)
return df[self.benchmark_ticker]
# Usage
report = sim(position, market=DynamicBenchmarkMarket('BTC'))
Practical Examples
Example 1: Gold ETF Backtest
class GoldMarket(Market):
@staticmethod
def get_name():
return 'gold'
@staticmethod
def get_freq():
return '1d'
def get_price(self, trade_at_price='close', adj=True):
# Load Gold ETF (GLD) prices from Yahoo Finance
import yfinance as yf
gold = yf.download('GLD', start='2010-01-01')
return gold[[trade_at_price.capitalize()]]
@staticmethod
def get_benchmark():
import yfinance as yf
sp500 = yf.download('^GSPC', start='2010-01-01')
return sp500['Close'].squeeze()
# Usage
gold_close = pd.read_csv('gold_close.csv', index_col=0, parse_dates=True)
position = gold_close > gold_close.average(50)
report = sim(position, market=GoldMarket(), resample='W')
Example 2: Taiwan + US Mixed Backtest
class GlobalMarket(Market):
@staticmethod
def get_name():
return 'global'
def get_price(self, trade_at_price='close', adj=True):
# Taiwan stocks
tw_close = data.get('price:收盤價')[['2330', '2317']]
# US stocks
us_close = data.get('us_price:adj_close')[['AAPL', 'TSLA']]
# Merge (outer join, auto-aligns dates)
combined = pd.concat([tw_close, us_close], axis=1)
return combined
@staticmethod
def get_benchmark():
# Use MSCI World Index or 60/40 TW/US combination
tw_benchmark = data.get('benchmark_return:發行量加權股價報酬指數').squeeze()
us_benchmark = data.get('world_index:adj_close')['^GSPC']
combined = pd.concat([tw_benchmark * 0.6, us_benchmark * 0.4], axis=1).sum(axis=1)
return combined
# Usage
position = ... # Position including TW + US stocks
report = sim(position, market=GlobalMarket())
Example 3: Cryptocurrency 4-Hour Candle Backtest
class Crypto4HMarket(Market):
@staticmethod
def get_name():
return 'crypto_4h'
@staticmethod
def get_freq():
return '4h' # 4-hour frequency
def get_price(self, trade_at_price='close', adj=True):
# Load 4-hour candle data from Binance API
df = pd.read_csv('binance_4h_close.csv', index_col=0, parse_dates=True)
return df
# Usage
close = pd.read_csv('binance_4h_close.csv', index_col=0, parse_dates=True)
position = close > close.rolling(50).mean()
# Note: resample parameter should match the frequency
report = sim(position, market=Crypto4HMarket(), resample='1d') # Daily rebalancing
Common Errors and Solutions
Error 1: File Read Failure
Symptom: Custom market raises FileNotFoundError when executed.
class CryptoMarket(Market):
def get_price(self, trade_at_price='close', adj=True):
df = pd.read_csv('crypto_close.csv', index_col=0, parse_dates=True)
return df
market = CryptoMarket()
close = market.get_price('close')
# FileNotFoundError: [Errno 2] No such file or directory: 'crypto_close.csv'
Causes: - CSV file does not exist or the path is wrong - Using a relative path while the working directory is incorrect - Filename is misspelled or the case does not match
Solution:
from finlab.market import Market
import pandas as pd
import os
class CryptoMarket(Market):
def __init__(self, data_dir='./data'):
"""
Args:
data_dir: Data directory path (default: ./data)
"""
self.data_dir = data_dir
@staticmethod
def get_name():
return 'crypto'
@staticmethod
def get_freq():
return '1d'
def get_price(self, trade_at_price='close', adj=True):
"""Fetch price data (with error handling)."""
# Build the full path
file_path = os.path.join(self.data_dir, f'crypto_{trade_at_price}.csv')
# Check whether the file exists
if not os.path.exists(file_path):
raise FileNotFoundError(
f"Data file not found: {file_path}\n"
f" Please verify:\n"
f" 1. The file exists in the {self.data_dir} directory\n"
f" 2. The filename is correct (crypto_{trade_at_price}.csv)\n"
f" 3. The working directory is correct (current: {os.getcwd()})\n"
f" Tip: sample data can be downloaded from https://example.com/data"
)
# Try to read the file
try:
df = pd.read_csv(file_path, index_col=0, parse_dates=True)
except PermissionError:
raise PermissionError(
f"No permission to read file: {file_path}\n"
f" Please check the file permission settings"
)
except pd.errors.EmptyDataError:
raise ValueError(
f"Data file is empty: {file_path}\n"
f" Please verify the file contents"
)
except Exception as e:
raise RuntimeError(
f"Failed to read file: {file_path}\n"
f" Error message: {e}"
)
# Validate data integrity (next step, see Error 2)
df = self._validate_dataframe(df, file_path)
return df
def _validate_dataframe(self, df, file_path):
"""Validate DataFrame format."""
# Check whether it is empty
if df.empty:
raise ValueError(
f"Data is empty: {file_path}\n"
f" Please verify the file contents"
)
# Check whether the index is a DatetimeIndex
if not isinstance(df.index, pd.DatetimeIndex):
raise TypeError(
f"Index must be a DatetimeIndex (date format)\n"
f" Current index type: {type(df.index)}\n"
f" Please ensure the first CSV column is a date (e.g. 2020-01-01)\n"
f" and use the parse_dates=True argument"
)
# Check whether there is any numeric data
if df.shape[1] == 0:
raise ValueError(
f"No stock columns found: {file_path}\n"
f" Please ensure the CSV format is correct (first column dates, other columns ticker symbols)"
)
print(f"Data loaded successfully: {file_path}")
print(f" Date range: {df.index[0]} ~ {df.index[-1]}")
print(f" Number of stocks: {df.shape[1]}")
print(f" Number of trading days: {df.shape[0]}")
return df
@staticmethod
def get_benchmark():
"""Benchmark index (with error handling)."""
try:
df = pd.read_csv('crypto_close.csv', index_col=0, parse_dates=True)
if 'BTC' not in df.columns:
print("Warning: BTC column not found, benchmark index cannot be set")
return pd.Series([]) # Return an empty Series
return df['BTC']
except Exception as e:
print(f"Warning: failed to load benchmark index: {e}")
return pd.Series([]) # Return an empty Series so the main workflow is unaffected
# Usage: specify the data directory
try:
market = CryptoMarket(data_dir='/path/to/crypto/data')
close = market.get_price('close')
print("Market object created successfully")
except FileNotFoundError as e:
print(f"\n{e}")
print("\nPlease download sample data or fix the file path")
except Exception as e:
print(f"\nFailed to create market object: {e}")
Error 2: Invalid Data Format
Symptom: DataFrame format does not meet expectations, causing the backtest to fail.
close = market.get_price('close')
report = sim(position, market=market)
# TypeError: Index must be DatetimeIndex
# or
# KeyError: 'BTC'
Common format issues:
- Index is not a DatetimeIndex
- Required columns are missing
- Incorrect data types (strings instead of numbers)
- Too many missing values
Solution: strengthen validation inside get_price()
class RobustMarket(Market):
def get_price(self, trade_at_price='close', adj=True):
"""Fetch price data (with full validation)."""
df = pd.read_csv(f'data_{trade_at_price}.csv', index_col=0)
# Validation 1: convert index to DatetimeIndex
if not isinstance(df.index, pd.DatetimeIndex):
try:
df.index = pd.to_datetime(df.index)
print("Index converted to DatetimeIndex")
except Exception as e:
raise TypeError(
f"Cannot convert index to DatetimeIndex\n"
f" Index sample: {df.index[:3].tolist()}\n"
f" Error message: {e}\n"
f" Please ensure the first column is a date (e.g. 2020-01-01 or 2020/01/01)"
)
# Validation 2: check data types
non_numeric_cols = df.select_dtypes(exclude=['number']).columns.tolist()
if non_numeric_cols:
print(f"Warning: the following columns are non-numeric and will be converted: {non_numeric_cols}")
for col in non_numeric_cols:
try:
df[col] = pd.to_numeric(df[col], errors='coerce')
except Exception as e:
raise TypeError(
f"Cannot convert column {col} to numeric\n"
f" Please ensure the column contains numbers"
)
# Validation 3: check missing-value ratio
missing_ratio = df.isna().sum() / len(df)
high_missing_cols = missing_ratio[missing_ratio > 0.3].index.tolist()
if high_missing_cols:
print(f"Warning: the following columns have > 30% missing values:")
for col in high_missing_cols:
print(f" {col}: {missing_ratio[col]:.1%} missing")
print(" Suggestions:")
print(" 1. Drop these columns (df.drop(columns=high_missing_cols))")
print(" 2. Forward-fill the gaps (df.ffill())")
# Auto-handle: forward fill
df = df.ffill()
print(" Missing values have been forward-filled automatically")
# Validation 4: check data size
if len(df) < 100:
print(f"Warning: too few data points ({len(df)} rows)")
print(" At least 252 trading days (about 1 year) is recommended")
if df.shape[1] < 2:
print(f"Warning: too few stocks ({df.shape[1]})")
print(" At least 10 stocks are recommended for diversification")
# Validation 5: sort the index
if not df.index.is_monotonic_increasing:
print("Warning: dates are unsorted, sorting automatically")
df = df.sort_index()
# Validation 6: remove duplicate dates
if df.index.duplicated().any():
dup_dates = df.index[df.index.duplicated()].tolist()
print(f"Warning: duplicate dates found: {dup_dates[:5]}...")
df = df[~df.index.duplicated(keep='last')]
print(" Keeping the last record only")
print(f"\nData validation complete")
print(f" Date range: {df.index[0].date()} ~ {df.index[-1].date()}")
print(f" Trading days: {len(df)}")
print(f" Number of stocks: {df.shape[1]}")
print(f" Missing values: {df.isna().sum().sum()} ({df.isna().sum().sum() / df.size:.2%})")
return df
# Usage
try:
market = RobustMarket()
close = market.get_price('close')
# Run backtest
position = close > close.rolling(20).mean()
report = sim(position, market=market, resample='M')
print("\nBacktest succeeded")
except (TypeError, ValueError) as e:
print(f"\n{e}")
print("\nPlease fix the data format and retry")
Error 3: API Request Failure
Symptom: Loading data via an API raises a network error.
class APIMarket(Market):
def get_price(self, trade_at_price='close', adj=True):
response = requests.get(f'https://api.example.com/{trade_at_price}')
return pd.DataFrame(response.json())
market = APIMarket()
close = market.get_price('close')
# requests.exceptions.ConnectionError: Failed to establish a new connection
Causes: - Network connectivity issues - API key is invalid or expired - API rate limit exceeded - API returned an unexpected format
Solution: add retry logic and error handling
import requests
import time
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
class APIMarket(Market):
def __init__(self, api_url, api_key=None, max_retries=3):
"""
Args:
api_url: API base URL
api_key: API key (optional)
max_retries: Maximum number of retries
"""
self.api_url = api_url
self.api_key = api_key
self.max_retries = max_retries
# Build a Session with retry logic
self.session = self._create_session()
def _create_session(self):
"""Create a requests Session with retry logic."""
session = requests.Session()
# Configure retry strategy
retry_strategy = Retry(
total=self.max_retries,
backoff_factor=1, # Exponential backoff: 1s, 2s, 4s
status_forcelist=[429, 500, 502, 503, 504], # HTTP status codes to retry on
allowed_methods=["HEAD", "GET", "OPTIONS"]
)
adapter = HTTPAdapter(max_retries=retry_strategy)
session.mount("http://", adapter)
session.mount("https://", adapter)
return session
@staticmethod
def get_name():
return 'api_market'
def get_price(self, trade_at_price='close', adj=True):
"""Fetch price data from an API (with error handling)."""
url = f'{self.api_url}/prices/{trade_at_price}'
# Set request headers
headers = {}
if self.api_key:
headers['Authorization'] = f'Bearer {self.api_key}'
print(f"Downloading data from API: {url}")
try:
# Send the request (with a timeout)
response = self.session.get(
url,
headers=headers,
timeout=30 # 30-second timeout
)
# Check the HTTP status code
if response.status_code == 401:
raise PermissionError(
f"API authentication failed (HTTP 401)\n"
f" Please verify the API key is correct and not expired\n"
f" Re-issue a key from the API provider if needed"
)
elif response.status_code == 429:
raise RuntimeError(
f"API rate limit exceeded (HTTP 429)\n"
f" Please retry later or upgrade your API plan"
)
elif response.status_code >= 500:
raise RuntimeError(
f"API server error (HTTP {response.status_code})\n"
f" The server is temporarily unavailable, please retry later"
)
response.raise_for_status() # Raise any other HTTP errors
# Parse JSON
data = response.json()
except requests.exceptions.Timeout:
raise TimeoutError(
f"API request timed out (> 30 seconds)\n"
f" Network is too slow or the API server is overloaded\n"
f" Suggestions:\n"
f" 1. Check network connectivity\n"
f" 2. Reduce the data range\n"
f" 3. Retry later"
)
except requests.exceptions.ConnectionError:
raise ConnectionError(
f"Network connection failed\n"
f" Unable to connect to {self.api_url}\n"
f" Please check:\n"
f" 1. Network connectivity\n"
f" 2. API URL is correct\n"
f" 3. Firewall is not blocking the connection"
)
except requests.exceptions.JSONDecodeError:
raise ValueError(
f"API returned an invalid format\n"
f" Expected JSON, but received: {response.text[:200]}...\n"
f" Please contact the API provider to confirm the documentation"
)
except Exception as e:
raise RuntimeError(
f"API request failed: {e}\n"
f" Please inspect the detailed error message"
)
# Convert to DataFrame
try:
df = pd.DataFrame(data)
# Handle common formats
if 'date' in df.columns:
df['date'] = pd.to_datetime(df['date'])
df = df.set_index('date')
elif 'timestamp' in df.columns:
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
df = df.set_index('timestamp')
print(f"API data downloaded successfully")
print(f" Date range: {df.index[0]} ~ {df.index[-1]}")
print(f" Number of stocks: {df.shape[1]}")
return df
except Exception as e:
raise ValueError(
f"Data conversion failed: {e}\n"
f" API response: {data}\n"
f" Please verify the data format documented by the API"
)
# Usage
import os
try:
# Read the API key from an environment variable (recommended)
api_key = os.environ.get('CRYPTO_API_KEY')
if not api_key:
print("Warning: CRYPTO_API_KEY environment variable is not set")
print(" export CRYPTO_API_KEY='your_api_key'")
market = APIMarket(
api_url='https://api.example.com',
api_key=api_key,
max_retries=3
)
close = market.get_price('close')
print("Market data fetched successfully")
except (ConnectionError, TimeoutError, PermissionError) as e:
print(f"\n{e}")
print("\nPlease fix the issue and retry")
except Exception as e:
print(f"\nUnexpected error: {e}")
Error 4: Backtest Execution Failure
Symptom: Market object is created successfully, but the backtest raises an error.
market = CryptoMarket()
position = close > close.average(20)
report = sim(position, market=market, resample='M')
# ValueError: Position index does not match price index
Causes:
- position and market.get_price() have mismatched date ranges
- position and the price data have mismatched ticker symbols
- Market object methods are not implemented correctly
Solution: validate compatibility before running the backtest
def validate_before_backtest(position, market):
"""Validate compatibility between position and market before backtesting."""
print("=== Pre-backtest validation ===\n")
# 1. Fetch market prices
try:
close = market.get_price('close')
except Exception as e:
raise RuntimeError(f"Unable to fetch market prices: {e}")
# 2. Check date ranges
position_dates = position.index
price_dates = close.index
print(f"Position date range: {position_dates[0]} ~ {position_dates[-1]}")
print(f"Price date range: {price_dates[0]} ~ {price_dates[-1]}")
# Intersection
common_dates = position_dates.intersection(price_dates)
if len(common_dates) == 0:
raise ValueError(
f"Position and Price share no overlapping dates\n"
f" Please ensure the data sources are consistent"
)
if len(common_dates) < len(position_dates) * 0.8:
print(f"Warning: only {len(common_dates) / len(position_dates):.1%} of position dates have matching prices")
print(" Some trading days may be ignored")
# 3. Check ticker symbols
position_stocks = position.columns.tolist()
price_stocks = close.columns.tolist()
print(f"\nPosition stock count: {len(position_stocks)}")
print(f"Price stock count: {len(price_stocks)}")
# Intersection
common_stocks = set(position_stocks).intersection(set(price_stocks))
if len(common_stocks) == 0:
raise ValueError(
f"Position and Price share no overlapping tickers\n"
f" Position sample: {position_stocks[:5]}\n"
f" Price sample: {price_stocks[:5]}\n"
f" Please ensure the ticker symbol formats match"
)
missing_stocks = set(position_stocks) - set(price_stocks)
if missing_stocks:
print(f"Warning: {len(missing_stocks)} tickers are not present in Price:")
print(f" {list(missing_stocks)[:10]}...")
print(" These tickers will be ignored")
# 4. Check market object methods
required_methods = ['get_name', 'get_freq', 'get_price']
for method in required_methods:
if not hasattr(market, method):
raise AttributeError(
f"Market object is missing a required method: {method}\n"
f" Please ensure the Market class is subclassed and all methods are implemented"
)
print(f"\nValidation passed: {len(common_stocks)} stocks, {len(common_dates)} trading days")
print("=" * 50)
# Usage
try:
market = CryptoMarket()
close = market.get_price('close')
position = close > close.rolling(20).mean()
# Run validation
validate_before_backtest(position, market)
# Run backtest
report = sim(position, market=market, resample='M', upload=False)
print("\nBacktest succeeded")
report.display()
except Exception as e:
print(f"\nBacktest failed: {e}")
print("Please fix the issue and retry")
Debugging Tips
1. Step-by-Step Market Object Testing
# Step 1: Test basic methods
market = CryptoMarket()
print(f"Market name: {market.get_name()}")
print(f"Data frequency: {market.get_freq()}")
# Step 2: Test price loading
close = market.get_price('close')
print(f"\nClose price data:\n{close.head()}")
# Step 3: Test benchmark index
benchmark = market.get_benchmark()
print(f"\nBenchmark index:\n{benchmark.head()}")
# Step 4: Run simple strategy
position = close > close.rolling(20).mean()
print(f"\nPosition signals:\n{position.tail()}")
# Step 5: Small-scale backtest
position_small = position.iloc[-100:] # Only last 100 days
report = sim(position_small, market=market, resample='M', upload=False)
report.display()
2. Wrap Key Steps with try-except
class SafeMarket(Market):
def get_price(self, trade_at_price='close', adj=True):
try:
# Main logic
df = pd.read_csv(f'data_{trade_at_price}.csv', index_col=0, parse_dates=True)
return df
except FileNotFoundError as e:
print(f"File not found: {e}")
raise
except Exception as e:
print(f"Unexpected error: {e}")
print(f" Error type: {type(e).__name__}")
raise
3. Detailed Logging
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class LoggedMarket(Market):
def get_price(self, trade_at_price='close', adj=True):
logger.info(f"Loading price data: {trade_at_price}")
try:
df = pd.read_csv(f'data_{trade_at_price}.csv', index_col=0, parse_dates=True)
logger.info(f"Data loaded successfully: {df.shape}")
return df
except Exception as e:
logger.error(f"Data loading failed: {e}", exc_info=True)
raise