Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed bugs in pandas_data and data.py which didnt correctly handle long lengths in get_historical_prices #661

Closed
wants to merge 19 commits into from
Closed
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
43a833c
fix for bug in data.py for polygon data
brettelliot Nov 28, 2024
8835f18
Merge branch 'dev' into fix-polygon-get-historical-prices
brettelliot Dec 9, 2024
f05b327
Make test deterministic
brettelliot Dec 9, 2024
7525392
Added deterministic tests for thanksgiving dates.
brettelliot Dec 9, 2024
36bbf9e
make sure tests that require polygon subs are properly skipped
brettelliot Dec 10, 2024
deea225
Fixed bug in get_start_datetime_and_ts_unit; wasn't getting enough pa…
brettelliot Dec 11, 2024
e4ea8a1
Merge branch 'dev' into fix-polygon-get-historical-prices
brettelliot Dec 12, 2024
1a230d0
set the startdate far back enough to get N bars but not too far back
brettelliot Dec 12, 2024
e6393f6
Fix one of the polygon tests in test_polygon
brettelliot Dec 12, 2024
b4cf8e1
Merge branch 'dev' into fix-polygon-get-historical-prices
brettelliot Dec 16, 2024
99b064b
Change warning to debug message
brettelliot Dec 17, 2024
4e3f0fd
Merge branch 'dev' into fix-polygon-get-historical-prices
brettelliot Dec 18, 2024
1f73603
Merge branch 'dev' into fix-polygon-get-historical-prices
brettelliot Dec 18, 2024
354e004
fix division by zero bug when shorting
brettelliot Dec 19, 2024
86a6f78
better tests for figuring out the get_historical_prices problem
brettelliot Dec 19, 2024
568f877
tests for backtest broker getting data in the future; fix for yahoo d…
brettelliot Dec 20, 2024
03db9c1
Fixed bug so that covering short positions works in drift rebalancer.
brettelliot Jan 2, 2025
2bfe2c9
Merge branch 'dev' into fix-polygon-get-historical-prices
brettelliot Jan 2, 2025
29bb725
remove warning that wasn't true. If the drift is 1.0 or -1.0 we will …
brettelliot Jan 2, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions lumibot/data_sources/pandas_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,8 +412,11 @@ def get_start_datetime_and_ts_unit(self, length, timestep, start_dt=None, start_
# Convert timestep string to timedelta and get start datetime
td, ts_unit = self.convert_timestep_str_to_timedelta(timestep)

# Multiply td by length to get the end datetime
td *= length
if ts_unit == "day":
# Multiply td * length * 1.5 to get the end datetime with overflow + 3 days for long weekends
td = (td * length * 1.5) + timedelta(days=3)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

category Functionality

Inflexible Date Buffer for Market Closures

Tell me more

What is the issue?
The hardcoded overflow factor of 1.5 and additional 3 days buffer may not be sufficient for all scenarios, particularly during periods with multiple holidays or market closures.

Why this matters
During periods with multiple holidays (e.g., Christmas to New Year's) or unexpected market closures, the calculation could still result in insufficient data being retrieved, causing potential data gaps or incorrect analysis.

Chat with Korbit by mentioning @korbit-ai, and give a 👍 or 👎 to help Korbit improve your reviews.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

1.5 multiplier can be large if someone is running a large backtest of like 2 years. How about instead using a round weeks check?

weeks_requested = td // 5  # Full trading week is 5 days
extra_padding_days = weeks_requested * 3  # to account for 3day weekends
td += extra_padding_days

else:
td *= length

if start_dt is not None:
start_datetime = start_dt - td
Expand Down
5 changes: 5 additions & 0 deletions lumibot/entities/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,11 @@ def _get_bars_dict(self, dt, length=1, timestep=None, timeshift=0):

# Get bars.
end_row = self.get_iter_count(dt) - timeshift
if self.df.index[end_row] != dt:
# If dt is not in the dataframe, get_iter_count will return the last bar before dt.
# Since the data is not complete, we need to get the last bar, which is the end_row.
# And since the selection at the end is exclusive of end_row, we need to add 1 to end_row here.
end_row += 1
start_row = end_row - length

if start_row < 0:
Expand Down
14 changes: 0 additions & 14 deletions lumibot/tools/polygon_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,20 +411,6 @@ def get_missing_dates(df_all, asset, start, end):
dates = pd.Series(df_all.index.date).unique()
missing_dates = sorted(set(trading_dates) - set(dates))

# TODO: This code works AFAIK, But when i enable it the tests for "test_polygon_missing_day_caching" and
# i don't know why nor how to fix this code or the tests. So im leaving it disabled for now. If you have problems
# with NANs in cached polygon data, you can try to enable this code and fix the tests.

# # Find any dates with nan values in the df_all DataFrame
# missing_dates += df_all[df_all.isnull().all(axis=1)].index.date.tolist()
#
# # make sure the dates are unique
# missing_dates = list(set(missing_dates))
# missing_dates.sort()
#
# # finally, filter out any dates that are not in start/end range (inclusive)
# missing_dates = [d for d in missing_dates if start.date() <= d <= end.date()]

return missing_dates


Expand Down
15 changes: 10 additions & 5 deletions tests/backtest/test_example_strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@

# Global parameters
# API Key for testing Polygon.io
POLYGON_API_KEY = os.environ.get("POLYGON_API_KEY")
POLYGON_IS_PAID_SUBSCRIPTION = os.getenv("POLYGON_IS_PAID_SUBSCRIPTION", "true").lower() not in {'false', '0', 'f', 'n', 'no'}

from lumibot.credentials import POLYGON_CONFIG

class TestExampleStrategies:
def test_stock_bracket(self):
Expand Down Expand Up @@ -208,7 +206,14 @@ def test_limit_and_trailing_stops(self):
assert round(results["total_return"] * 100, 1) >= 0.7
assert round(results["max_drawdown"]["drawdown"] * 100, 1) <= 0.2

@pytest.mark.skipif(POLYGON_API_KEY == '<your key here>', reason="This test requires a Polygon.io API key")
@pytest.mark.skipif(
not POLYGON_CONFIG["API_KEY"],
reason="This test requires a Polygon.io API key"
)
@pytest.mark.skipif(
POLYGON_CONFIG['API_KEY'] == '<your key here>',
reason="This test requires a Polygon.io API key"
)
def test_options_hold_to_expiry(self):
"""
Test the example strategy OptionsHoldToExpiry by running a backtest and checking that the strategy object is
Expand All @@ -227,7 +232,7 @@ def test_options_hold_to_expiry(self):
show_plot=False,
show_tearsheet=False,
save_tearsheet=False,
polygon_api_key=POLYGON_API_KEY,
polygon_api_key=POLYGON_CONFIG["API_KEY"],
)

trades_df = strat_obj.broker._trade_event_log_df
Expand Down
76 changes: 65 additions & 11 deletions tests/backtest/test_polygon.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@
from datetime import timedelta

# Global parameters
# API Key for testing Polygon.io
from lumibot.credentials import POLYGON_API_KEY
from lumibot.credentials import POLYGON_CONFIG


class PolygonBacktestStrat(Strategy):
Expand Down Expand Up @@ -204,7 +203,18 @@ def verify_backtest_results(self, poly_strat_obj):
)
assert "fill" not in poly_strat_obj.order_time_tracker[stoploss_order_id]

@pytest.mark.skipif(POLYGON_API_KEY == '<your key here>', reason="This test requires a Polygon.io API key")
@pytest.mark.skipif(
not POLYGON_CONFIG["API_KEY"],
reason="This test requires a Polygon.io API key"
)
@pytest.mark.skipif(
POLYGON_CONFIG['API_KEY'] == '<your key here>',
reason="This test requires a Polygon.io API key"
)
@pytest.mark.skipif(
not POLYGON_CONFIG["IS_PAID_SUBSCRIPTION"],
reason="This test requires a paid Polygon.io API key"
)
def test_polygon_restclient(self):
"""
Test Polygon REST Client with Lumibot Backtesting and real API calls to Polygon. Using the Amazon stock
Expand All @@ -219,7 +229,7 @@ def test_polygon_restclient(self):
data_source = PolygonDataBacktesting(
datetime_start=backtesting_start,
datetime_end=backtesting_end,
api_key=POLYGON_API_KEY,
api_key=POLYGON_CONFIG['API_KEY'],
)
broker = BacktestingBroker(data_source=data_source)
poly_strat_obj = PolygonBacktestStrat(
Expand All @@ -232,7 +242,18 @@ def test_polygon_restclient(self):
assert results
self.verify_backtest_results(poly_strat_obj)

@pytest.mark.skipif(POLYGON_API_KEY == '<your key here>', reason="This test requires a Polygon.io API key")
@pytest.mark.skipif(
not POLYGON_CONFIG["API_KEY"],
reason="This test requires a Polygon.io API key"
)
@pytest.mark.skipif(
POLYGON_CONFIG['API_KEY'] == '<your key here>',
reason="This test requires a Polygon.io API key"
)
@pytest.mark.skipif(
not POLYGON_CONFIG["IS_PAID_SUBSCRIPTION"],
reason="This test requires a paid Polygon.io API key"
)
def test_intraday_daterange(self):
tzinfo = pytz.timezone("America/New_York")
backtesting_start = datetime.datetime(2024, 2, 7).astimezone(tzinfo)
Expand All @@ -241,7 +262,7 @@ def test_intraday_daterange(self):
data_source = PolygonDataBacktesting(
datetime_start=backtesting_start,
datetime_end=backtesting_end,
api_key=POLYGON_API_KEY,
api_key=POLYGON_CONFIG['API_KEY'],
)
broker = BacktestingBroker(data_source=data_source)
poly_strat_obj = PolygonBacktestStrat(
Expand All @@ -256,7 +277,18 @@ def test_intraday_daterange(self):
# Assert the end datetime is before the market open of the next trading day.
assert broker.datetime == datetime.datetime.fromisoformat("2024-02-12 08:30:00-05:00")

@pytest.mark.skipif(POLYGON_API_KEY == '<your key here>', reason="This test requires a Polygon.io API key")
@pytest.mark.skipif(
not POLYGON_CONFIG["API_KEY"],
reason="This test requires a Polygon.io API key"
)
@pytest.mark.skipif(
POLYGON_CONFIG['API_KEY'] == '<your key here>',
reason="This test requires a Polygon.io API key"
)
@pytest.mark.skipif(
not POLYGON_CONFIG["IS_PAID_SUBSCRIPTION"],
reason="This test requires a paid Polygon.io API key"
)
def test_polygon_legacy_backtest(self):
"""
Do the same backtest as test_polygon_restclient() but using the legacy backtest() function call instead of
Expand All @@ -283,7 +315,18 @@ def test_polygon_legacy_backtest(self):
assert results
self.verify_backtest_results(poly_strat_obj)

@pytest.mark.skipif(POLYGON_API_KEY == '<your key here>', reason="This test requires a Polygon.io API key")
@pytest.mark.skipif(
not POLYGON_CONFIG["API_KEY"],
reason="This test requires a Polygon.io API key"
)
@pytest.mark.skipif(
POLYGON_CONFIG['API_KEY'] == '<your key here>',
reason="This test requires a Polygon.io API key"
)
@pytest.mark.skipif(
not POLYGON_CONFIG["IS_PAID_SUBSCRIPTION"],
reason="This test requires a paid Polygon.io API key"
)
def test_polygon_legacy_backtest2(self):
"""Test that the legacy backtest() function call works without returning the startegy object"""
# Parameters: True = Live Trading | False = Backtest
Expand All @@ -300,7 +343,7 @@ def test_polygon_legacy_backtest2(self):
show_plot=False,
show_tearsheet=False,
save_tearsheet=False,
polygon_api_key=POLYGON_API_KEY, # Testing the legacy parameter name while DeprecationWarning is active
polygon_api_key=POLYGON_CONFIG['API_KEY'], # Testing the legacy parameter name while DeprecationWarning is active
)
assert results

Expand Down Expand Up @@ -349,14 +392,25 @@ def test_pull_source_symbol_bars_with_api_call(self, polygon_data_backtesting, m

class TestPolygonDataSource:

@pytest.mark.skipif(POLYGON_API_KEY == '<your key here>', reason="This test requires a Polygon.io API key")
@pytest.mark.skipif(
not POLYGON_CONFIG["API_KEY"],
reason="This test requires a Polygon.io API key"
)
@pytest.mark.skipif(
POLYGON_CONFIG['API_KEY'] == '<your key here>',
reason="This test requires a Polygon.io API key"
)
@pytest.mark.skipif(
not POLYGON_CONFIG["IS_PAID_SUBSCRIPTION"],
reason="This test requires a paid Polygon.io API key"
)
def test_get_historical_prices(self):
tzinfo = pytz.timezone("America/New_York")
start = datetime.datetime(2024, 2, 5).astimezone(tzinfo)
end = datetime.datetime(2024, 2, 10).astimezone(tzinfo)

data_source = PolygonDataBacktesting(
start, end, api_key=POLYGON_API_KEY
start, end, api_key=POLYGON_CONFIG['API_KEY']
)
data_source._datetime = datetime.datetime(2024, 2, 7, 10).astimezone(tzinfo)
# This call will set make the data source use minute bars.
Expand Down
113 changes: 97 additions & 16 deletions tests/test_get_historical_prices.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,7 @@
from lumibot.tools import get_trading_days

# Global parameters
# API Key for testing Polygon.io
from lumibot.credentials import POLYGON_API_KEY
from lumibot.credentials import TRADIER_CONFIG, ALPACA_CONFIG
from lumibot.credentials import TRADIER_CONFIG, ALPACA_CONFIG, POLYGON_CONFIG


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -59,7 +57,20 @@ class TestDatasourceBacktestingGetHistoricalPricesDailyData:
@classmethod
def setup_class(cls):
pass


# noinspection PyMethodMayBeStatic
def get_first_trading_day_after_thanksgiving(self, year):
# Thanksgiving is the fourth Thursday in November
thanksgiving = datetime(year, 11, 1) + timedelta(days=(3 - datetime(year, 11, 1).weekday() + 28) % 7 + 21)
# The first trading day after Thanksgiving is the next business day
first_trading_day = thanksgiving + timedelta(days=1)

# Check if the first trading day is a weekend and adjust accordingly
if first_trading_day.weekday() >= 5: # 5 = Saturday, 6 = Sunday
first_trading_day += timedelta(days=(7 - first_trading_day.weekday()))

return first_trading_day

# noinspection PyMethodMayBeStatic
def check_date_of_last_bar_is_date_of_last_trading_date_before_backtest_start(
self, bars: Bars,
Expand Down Expand Up @@ -117,6 +128,13 @@ def check_dividends_and_adjusted_returns(self, bars):
rtol=0
)

def test_get_first_trading_day_after_thanksgiving(self):
first_trading_day_after_thanksgiving = self.get_first_trading_day_after_thanksgiving(2019)
assert first_trading_day_after_thanksgiving == datetime(2019, 11, 29)

first_trading_day_after_thanksgiving = self.get_first_trading_day_after_thanksgiving(2023)
assert first_trading_day_after_thanksgiving == datetime(2023, 11, 24)

def test_pandas_backtesting_data_source_get_historical_prices_daily_bars(self, pandas_data_fixture):
"""
This tests that the pandas data_source calculates adjusted returns for bars and that they
Expand All @@ -131,20 +149,65 @@ def test_pandas_backtesting_data_source_get_historical_prices_daily_bars(self, p
)
bars = data_source.get_historical_prices(asset=self.asset, length=self.length, timestep=self.timestep)
check_bars(bars=bars, length=self.length)
self.check_date_of_last_bar_is_date_of_last_trading_date_before_backtest_start(bars, backtesting_start=backtesting_start)
self.check_date_of_last_bar_is_date_of_last_trading_date_before_backtest_start(
bars,
backtesting_start=backtesting_start
)
self.check_dividends_and_adjusted_returns(bars)

@pytest.mark.skip(reason="This test exposes a possible bug in data.py that we have not investigated yet.")
@pytest.mark.skipif(POLYGON_API_KEY == '<your key here>', reason="This test requires a Polygon.io API key")
# First trading day after Thanksgiving test
backtesting_start = datetime(2019, 11, 2)
backtesting_end = self.get_first_trading_day_after_thanksgiving(2019)
data_source = PandasData(
datetime_start=backtesting_start,
datetime_end=backtesting_end,
pandas_data=pandas_data_fixture
)
bars = data_source.get_historical_prices(asset=self.asset, length=self.length, timestep=self.timestep)
check_bars(bars=bars, length=self.length)
self.check_date_of_last_bar_is_date_of_last_trading_date_before_backtest_start(
bars,
backtesting_start=backtesting_start
)

@pytest.mark.skipif(
not POLYGON_CONFIG["API_KEY"],
reason="This test requires a Polygon.io API key"
)
@pytest.mark.skipif(
POLYGON_CONFIG['API_KEY'] == '<your key here>',
reason="This test requires a Polygon.io API key"
)
@pytest.mark.skipif(
not POLYGON_CONFIG["IS_PAID_SUBSCRIPTION"],
reason="This test requires a paid Polygon.io API key"
)
def test_polygon_backtesting_data_source_get_historical_prices_daily_bars(self):
backtesting_end = datetime.now() - timedelta(days=1)
backtesting_start = backtesting_end - timedelta(days=self.length * 2 + 5)
last_year = datetime.now().year - 1
backtesting_start = datetime(last_year, 3, 25)
backtesting_end = datetime(last_year, 4, 25)
data_source = PolygonDataBacktesting(
backtesting_start, backtesting_end, api_key=POLYGON_API_KEY
backtesting_start, backtesting_end, api_key=POLYGON_CONFIG["API_KEY"]
)
bars = data_source.get_historical_prices(asset=self.asset, length=self.length, timestep=self.timestep)
check_bars(bars=bars, length=self.length)
self.check_date_of_last_bar_is_date_of_last_trading_date_before_backtest_start(bars, backtesting_start=backtesting_start)
self.check_date_of_last_bar_is_date_of_last_trading_date_before_backtest_start(
bars,
backtesting_start=backtesting_start
)

# First trading day after Thanksgiving test
backtesting_start = datetime(last_year, 11, 2)
backtesting_end = self.get_first_trading_day_after_thanksgiving(last_year)
data_source = PolygonDataBacktesting(
backtesting_start, backtesting_end, api_key=POLYGON_CONFIG["API_KEY"]
)
bars = data_source.get_historical_prices(asset=self.asset, length=self.length, timestep=self.timestep)
check_bars(bars=bars, length=self.length)
self.check_date_of_last_bar_is_date_of_last_trading_date_before_backtest_start(
bars,
backtesting_start=backtesting_start
)

def test_yahoo_backtesting_data_source_get_historical_prices_daily_bars(self, pandas_data_fixture):
"""
Expand All @@ -161,10 +224,27 @@ def test_yahoo_backtesting_data_source_get_historical_prices_daily_bars(self, pa
bars = data_source.get_historical_prices(asset=self.asset, length=self.length, timestep=self.timestep)
check_bars(bars=bars, length=self.length)
self.check_dividends_and_adjusted_returns(bars)
self.check_date_of_last_bar_is_date_of_last_trading_date_before_backtest_start(bars, backtesting_start=backtesting_start)
self.check_date_of_last_bar_is_date_of_last_trading_date_before_backtest_start(
bars,
backtesting_start=backtesting_start
)

# First trading day after Thanksgiving test
backtesting_start = datetime(2019, 11, 2)
backtesting_end = self.get_first_trading_day_after_thanksgiving(2019)
data_source = YahooDataBacktesting(
datetime_start=backtesting_start,
datetime_end=backtesting_end,
pandas_data=pandas_data_fixture
)
bars = data_source.get_historical_prices(asset=self.asset, length=self.length, timestep=self.timestep)
check_bars(bars=bars, length=self.length)
self.check_date_of_last_bar_is_date_of_last_trading_date_before_backtest_start(
bars,
backtesting_start=backtesting_start
)


# @pytest.mark.skip()
class TestDatasourceGetHistoricalPricesDailyData:
"""These tests check the daily Bars returned from get_historical_prices for live data sources."""

Expand Down Expand Up @@ -202,8 +282,10 @@ def check_date_of_last_bar_is_correct_for_live_data_sources(self, bars):
# if it's not a trading day, the last bar the bar should from the last trading day
assert bars.df.index[-1].date() == self.trading_days.index[-1].date()

# @pytest.mark.skip()
@pytest.mark.skipif(not ALPACA_CONFIG['API_KEY'], reason="This test requires an alpaca API key")
@pytest.mark.skipif(
not ALPACA_CONFIG['API_KEY'],
reason="This test requires an alpaca API key"
)
@pytest.mark.skipif(
ALPACA_CONFIG['API_KEY'] == '<your key here>',
reason="This test requires an alpaca API key"
Expand All @@ -225,7 +307,6 @@ def test_alpaca_data_source_get_historical_prices_daily_bars(self):
check_bars(bars=bars, length=1, check_timezone=False)
self.check_date_of_last_bar_is_correct_for_live_data_sources(bars)

# @pytest.mark.skip()
@pytest.mark.skipif(not TRADIER_CONFIG['ACCESS_TOKEN'], reason="No Tradier credentials provided.")
def test_tradier_data_source_get_historical_prices_daily_bars(self):
data_source = TradierData(
Expand Down
Loading
Loading