diff --git a/lumibot/tools/polygon_helper.py b/lumibot/tools/polygon_helper.py index 3325a04c1..e27f95a42 100644 --- a/lumibot/tools/polygon_helper.py +++ b/lumibot/tools/polygon_helper.py @@ -411,6 +411,17 @@ def get_missing_dates(df_all, asset, start, end): dates = pd.Series(df_all.index.date).unique() missing_dates = sorted(set(trading_dates) - set(dates)) + # Find any dates with nan values in the df_all DataFrame. This happens for some infrequently traded assets, but + # it is difficult to know if the data is actually missing or if it is just infrequent trading, query for it again. + missing_dates += df_all[df_all.isnull().all(axis=1)].index.date.tolist() + + # make sure the dates are unique + missing_dates = list(set(missing_dates)) + missing_dates.sort() + + # finally, filter out any dates that are not in start/end range (inclusive) + missing_dates = [d for d in missing_dates if start.date() <= d <= end.date()] + return missing_dates diff --git a/tests/test_polygon_helper.py b/tests/test_polygon_helper.py index 8d1387f39..7af65bc1c 100644 --- a/tests/test_polygon_helper.py +++ b/tests/test_polygon_helper.py @@ -480,10 +480,5 @@ def test_polygon_missing_day_caching(self, mocker, tmpdir, timespan, force_cache assert mock_polyclient.create().get_aggs.call_count == 3 assert expected_cachefile.exists() assert len(df) == 7 - df = ph.get_price_data_from_polygon(api_key, asset, start_date, end_date, timespan, force_cache_update=force_cache_update) - assert len(df) == 7 - if force_cache_update: - assert mock_polyclient.create().get_aggs.call_count == 2 * 3 - else: - assert mock_polyclient.create().get_aggs.call_count == 3 + expected_cachefile.unlink()