diff --git a/python/prophet/forecaster.py b/python/prophet/forecaster.py index 12caabf0b..e51d83b08 100644 --- a/python/prophet/forecaster.py +++ b/python/prophet/forecaster.py @@ -25,6 +25,7 @@ logger.setLevel(logging.INFO) NANOSECONDS_TO_SECONDS = 1000 * 1000 * 1000 + class Prophet(object): """Prophet forecaster. @@ -77,6 +78,7 @@ class Prophet(object): stan_backend: str as defined in StanBackendEnum default: None - will try to iterate over all available backends and find the working one holidays_mode: 'additive' or 'multiplicative'. Defaults to seasonality_mode. + negative_prediction_values: bool check to set all negative prediction values in the DataFrame to 0. """ def __init__( @@ -99,6 +101,7 @@ def __init__( stan_backend=None, scaling: str = 'absmax', holidays_mode=None, + negative_prediction_values=True ): self.growth = growth @@ -150,6 +153,7 @@ def __init__( self.train_component_cols = None self.component_modes = None self.train_holiday_names = None + self.negative_prediction_values = negative_prediction_values self.fit_kwargs = {} self.validate_inputs() self._load_stan_backend(stan_backend) @@ -1185,6 +1189,37 @@ def calculate_initial_params(self, num_total_regressors: int) -> ModelParams: sigma_obs=1.0, ) + def calculate_and_clip_percentile(self, data, component, comp, lower_p, upper_p): + """ + A helper function to calculate the lower and upper percentiles for a given component. + + Parameters: + - data: dict or similar + The data structure where the calculated percentiles will be stored. + - component: str + The name of the component for which the percentiles are being calculated. + - comp: array-like + The data for which percentiles are to be calculated. + - lower_p: float + The percentile to calculate for the lower bound. + - upper_p: float + The percentile to calculate for the upper bound. + + Returns: + - None + The function directly modifies the `data` structure by adding the lower and upper percentile values. + """ + lower = self.percentile(comp, lower_p, axis=1) + upper = self.percentile(comp, upper_p, axis=1) + + if not self.negative_prediction_values: + lower = np.clip(lower, a_min=0, a_max=None) + upper = np.clip(upper, a_min=0, a_max=None) + + data[component + '_lower'] = lower + data[component + '_upper'] = upper + + def fit(self, df, **kwargs): """Fit the Prophet model. @@ -1282,12 +1317,19 @@ def predict(self, df: pd.DataFrame = None, vectorized: bool = True) -> pd.DataFr cols.append('cap') if self.logistic_floor: cols.append('floor') + if not self.negative_prediction_values: + df['trend'] = df['trend'].clip(lower=0) + # Add in forecast components df2 = pd.concat((df[cols], intervals, seasonal_components), axis=1) df2['yhat'] = ( df2['trend'] * (1 + df2['multiplicative_terms']) + df2['additive_terms'] ) + + if not self.negative_prediction_values: + df2['yhat'] = df2['yhat'].clip(lower=0) + return df2 @staticmethod @@ -1415,7 +1457,9 @@ def predict_seasonal_components(self, df): comp = np.matmul(X, beta_c.transpose()) if component in self.component_modes['additive']: comp *= self.y_scale + data[component] = np.nanmean(comp, axis=1) + if self.uncertainty_samples: data[component + '_lower'] = self.percentile( comp, lower_p, axis=1, @@ -1423,6 +1467,7 @@ def predict_seasonal_components(self, df): data[component + '_upper'] = self.percentile( comp, upper_p, axis=1, ) + return pd.DataFrame(data) def predict_uncertainty(self, df: pd.DataFrame, vectorized: bool) -> pd.DataFrame: @@ -1444,10 +1489,9 @@ def predict_uncertainty(self, df: pd.DataFrame, vectorized: bool) -> pd.DataFram series = {} for key in ['yhat', 'trend']: - series['{}_lower'.format(key)] = self.percentile( - sim_values[key], lower_p, axis=1) - series['{}_upper'.format(key)] = self.percentile( - sim_values[key], upper_p, axis=1) + self.calculate_and_clip_percentile( + series, key, sim_values[key], lower_p, upper_p + ) return pd.DataFrame(series) diff --git a/python/prophet/tests/test_prophet.py b/python/prophet/tests/test_prophet.py index 3df052d94..10b9b809b 100644 --- a/python/prophet/tests/test_prophet.py +++ b/python/prophet/tests/test_prophet.py @@ -255,6 +255,7 @@ def test_make_future_dataframe_include_history(self, daily_univariate_ts, backen assert len(future) == train.shape[0] + 3 + class TestProphetTrendComponent: def test_invalid_growth_input(self, backend): msg = 'Parameter "growth" should be "linear", ' '"logistic" or "flat".' @@ -433,6 +434,16 @@ def test_override_n_changepoints(self, daily_univariate_ts, backend): cp = m.changepoints_t assert cp.shape[0] == 15 + def test_without_negative_predictions(self, subdaily_univariate_ts, backend): + test_days = 280 + train, test = train_test_split(subdaily_univariate_ts, test_days) + forecaster = Prophet(stan_backend=backend, negative_prediction_values=False, weekly_seasonality=True, yearly_seasonality=True) + forecaster.fit(train, seed=1237861298) + np.random.seed(876543987) + future = forecaster.make_future_dataframe(test_days, include_history=False) + future = forecaster.predict(future) + assert (future['yhat'].values >= 0).all() + class TestProphetSeasonalComponent: def test_fourier_series_weekly(self, daily_univariate_ts): @@ -877,7 +888,6 @@ def test_subdaily_holidays(self, subdaily_univariate_ts, backend): assert sum(fcst["special_day"] == 0) == 575 - class TestProphetRegressors: def test_added_regressors(self, daily_univariate_ts, backend): m = Prophet(stan_backend=backend)