diff --git a/tests/test_ops_curve_shift.py b/tests/test_ops_curve_shift.py index da62d4c..72e420e 100644 --- a/tests/test_ops_curve_shift.py +++ b/tests/test_ops_curve_shift.py @@ -6,14 +6,11 @@ import pytest from wind_up.ops_curve_shift import ( - CURVE_CONSTANTS, CurveConfig, CurveShiftInput, + CurveShiftOutput, CurveTypes, - calculate_pitch_curve_shift, - calculate_power_curve_shift, - calculate_rpm_curve_shift, - calculate_wind_speed_curve_shift, + calculate_curve_shift, check_for_ops_curve_shift, ) @@ -23,7 +20,7 @@ def fake_power_curve_df() -> pd.DataFrame: return pd.DataFrame( { "wind_speed": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], - "power": [0, 0, 0, 1, 3, 6, 10, 15, 22, 30, 36, 39, 40, 40, 40], + "power": [0, 0, np.nan, 1, 3, 6, 10, 15, 22, 30, 36, 39, 40, 40, 40], } ).set_index("power") @@ -100,27 +97,23 @@ def test_missing_column_in_post_df(self, column_name: str, fake_power_curve_df: @pytest.mark.parametrize( ("shift_amount", "expected"), [ - pytest.param(2.0, -0.22099447513812154, id="shift DOES exceed threshold"), - pytest.param(0.05, -0.007042253521126751, id="shift DOES NOT exceed threshold"), + pytest.param(2.0, -0.21557719054241997, id="shift DOES exceed threshold"), + pytest.param(0.05, -0.006954837573730166, id="shift DOES NOT exceed threshold"), ], ) -def test_calculate_power_curve_shift( - shift_amount: float, expected: float, fake_power_curve_df: pd.DataFrame, caplog: pytest.LogCaptureFixture -) -> None: - with caplog.at_level(logging.WARNING): - actual = calculate_power_curve_shift( - turbine_name="anything", - pre_df=fake_power_curve_df.reset_index(), - post_df=(fake_power_curve_df + shift_amount).reset_index(), - x_col="wind_speed", - y_col="power", - ) - - if abs(expected) > CURVE_CONSTANTS[CurveTypes.POWER_CURVE.value]["warning_threshold"]: - assert "Ops Curve Shift warning" in caplog.text - assert f": {actual:.3f}" in caplog.text # check the actual value (including its +/- sign) is in the log message +def test_calculate_power_curve_shift(shift_amount: float, expected: float, fake_power_curve_df: pd.DataFrame) -> None: + curve_shift_input = CurveShiftInput( + turbine_name="anything", + pre_df=fake_power_curve_df.reset_index(), + post_df=(fake_power_curve_df + shift_amount).reset_index(), + curve_config=CurveConfig(name=CurveTypes.POWER_CURVE, x_col="wind_speed", y_col="power"), + ) + # check that CurveShiftInput pydantic model has removed NaNs + assert not curve_shift_input.pre_df.isna().to_numpy().any() + assert not curve_shift_input.post_df.isna().to_numpy().any() + actual = calculate_curve_shift(curve_shift_input=curve_shift_input) - np.testing.assert_almost_equal(actual=actual, desired=expected) + np.testing.assert_almost_equal(actual=actual.value, desired=expected) @pytest.mark.parametrize( @@ -134,18 +127,16 @@ def test_calculate_rpm_curve_shift( shift_amount: float, expected: float, fake_gen_rpm_curve_df: pd.DataFrame, caplog: pytest.LogCaptureFixture ) -> None: with caplog.at_level(logging.WARNING): - actual = calculate_rpm_curve_shift( - turbine_name="anything", - pre_df=fake_gen_rpm_curve_df.reset_index(), - post_df=(fake_gen_rpm_curve_df + shift_amount).reset_index(), - x_col="wind_speed", - y_col="gen_rpm", + actual = calculate_curve_shift( + curve_shift_input=CurveShiftInput( + turbine_name="anything", + pre_df=fake_gen_rpm_curve_df.reset_index(), + post_df=(fake_gen_rpm_curve_df + shift_amount).reset_index(), + curve_config=CurveConfig(name=CurveTypes.RPM, x_col="wind_speed", y_col="gen_rpm"), + ) ) - if abs(expected) > CURVE_CONSTANTS[CurveTypes.RPM.value]["warning_threshold"]: - assert "Ops Curve Shift warning" in caplog.text - - np.testing.assert_almost_equal(actual=actual, desired=expected) + np.testing.assert_almost_equal(actual=actual.value, desired=expected) @pytest.mark.parametrize( @@ -155,47 +146,39 @@ def test_calculate_rpm_curve_shift( pytest.param(0.13, -0.09533333333333438, id="shift DOES NOT exceed threshold"), ], ) -def test_calculate_pitch_curve_shift( - shift_amount: float, expected: float, fake_pitch_curve_df: pd.DataFrame, caplog: pytest.LogCaptureFixture -) -> None: - with caplog.at_level(logging.WARNING): - actual = calculate_pitch_curve_shift( +def test_calculate_pitch_curve_shift(shift_amount: float, expected: float, fake_pitch_curve_df: pd.DataFrame) -> None: + actual = calculate_curve_shift( + curve_shift_input=CurveShiftInput( turbine_name="anything", pre_df=fake_pitch_curve_df.reset_index(), post_df=(fake_pitch_curve_df + shift_amount).reset_index(), - x_col="wind_speed", - y_col="pitch", + curve_config=CurveConfig(name=CurveTypes.PITCH, x_col="wind_speed", y_col="pitch"), ) + ) - if abs(expected) > CURVE_CONSTANTS[CurveTypes.PITCH.value]["warning_threshold"]: - assert "Ops Curve Shift warning" in caplog.text - - np.testing.assert_almost_equal(actual=actual, desired=expected) + np.testing.assert_almost_equal(actual=actual.value, desired=expected) @pytest.mark.parametrize( ("shift_amount", "expected"), [ - pytest.param(2.0, 0.21621621621621623, id="shift DOES exceed threshold"), - pytest.param(0.05, -0.04729729729729748, id="shift DOES NOT exceed threshold"), + pytest.param(2.0, 0.21296296296296302, id="shift DOES exceed threshold"), + pytest.param(0.05, -0.03981481481481486, id="shift DOES NOT exceed threshold"), ], ) def test_calculate_wind_speed_curve_shift( - shift_amount: float, expected: float, fake_power_curve_df: pd.DataFrame, caplog: pytest.LogCaptureFixture + shift_amount: float, expected: float, fake_power_curve_df: pd.DataFrame ) -> None: - with caplog.at_level(logging.WARNING): - actual = calculate_wind_speed_curve_shift( + actual = calculate_curve_shift( + curve_shift_input=CurveShiftInput( turbine_name="anything", pre_df=fake_power_curve_df.reset_index(), post_df=(fake_power_curve_df + shift_amount).reset_index(), - x_col="power", - y_col="wind_speed", + curve_config=CurveConfig(name=CurveTypes.WIND_SPEED, x_col="power", y_col="wind_speed"), ) + ) - if abs(expected) > CURVE_CONSTANTS[CurveTypes.WIND_SPEED.value]["warning_threshold"]: - assert "Ops Curve Shift warning" in caplog.text - - np.testing.assert_almost_equal(actual=actual, desired=expected) + np.testing.assert_almost_equal(actual=actual.value, desired=expected) class TestCheckForOpsCurveShift: @@ -269,10 +252,10 @@ def test_calls_funcs_as_intended( wtg_name = "anything" with ( - patch("wind_up.ops_curve_shift.calculate_power_curve_shift", return_value=np.nan) as mock_power, - patch("wind_up.ops_curve_shift.calculate_rpm_curve_shift", return_value=np.nan) as mock_rpm, - patch("wind_up.ops_curve_shift.calculate_pitch_curve_shift", return_value=np.nan) as mock_pitch, - patch("wind_up.ops_curve_shift.calculate_wind_speed_curve_shift", return_value=np.nan) as mock_ws, + patch( + "wind_up.ops_curve_shift.calculate_curve_shift", + return_value=CurveShiftOutput(value=np.nan, warning_msg=None), + ) as mock_curve_shift, patch("wind_up.ops_curve_shift.compare_ops_curves_pre_post", return_value=None) as mock_plot_func, ): mock_wind_up_conf = Mock() @@ -291,19 +274,40 @@ def test_calls_funcs_as_intended( plot_cfg=mock_plot_conf, ) - mock_power.assert_called_once_with( - turbine_name=wtg_name, pre_df=_df, post_df=_df, x_col="wind_speed", y_col="power" + # define expected call inputs + curve_input_power = CurveShiftInput( + turbine_name=wtg_name, + pre_df=_df, + post_df=_df, + curve_config=CurveConfig(name=CurveTypes.POWER_CURVE, x_col="wind_speed", y_col="power"), ) - - mock_rpm.assert_called_once_with(turbine_name=wtg_name, pre_df=_df, post_df=_df, x_col="power", y_col="gen_rpm") - - mock_pitch.assert_called_once_with( - turbine_name=wtg_name, pre_df=_df, post_df=_df, x_col="wind_speed", y_col="pitch" + curve_input_rpm = CurveShiftInput( + turbine_name=wtg_name, + pre_df=_df, + post_df=_df, + curve_config=CurveConfig(name=CurveTypes.RPM, x_col="power", y_col="gen_rpm"), ) - - mock_ws.assert_called_once_with( - turbine_name=wtg_name, pre_df=_df, post_df=_df, x_col="power", y_col="wind_speed" + curve_input_pitch = CurveShiftInput( + turbine_name=wtg_name, + pre_df=_df, + post_df=_df, + curve_config=CurveConfig(name=CurveTypes.PITCH, x_col="wind_speed", y_col="pitch"), ) + curve_input_wind_speed = CurveShiftInput( + turbine_name=wtg_name, + pre_df=_df, + post_df=_df, + curve_config=CurveConfig(name=CurveTypes.WIND_SPEED, x_col="power", y_col="wind_speed"), + ) + _call_inputs_list = [curve_input_power, curve_input_rpm, curve_input_pitch, curve_input_wind_speed] + + # check calls are made with expected inputs + for _call, _input in zip(mock_curve_shift.mock_calls, _call_inputs_list): + pd.testing.assert_frame_equal(_call.kwargs["curve_shift_input"].pre_df, _input.pre_df) + pd.testing.assert_frame_equal(_call.kwargs["curve_shift_input"].post_df, _input.post_df) + assert _call.kwargs["curve_shift_input"].model_dump(exclude=["pre_df", "post_df"]) == _input.model_dump( + exclude=["pre_df", "post_df"] + ) mock_plot_func.assert_called_once_with( pre_df=_df, diff --git a/wind_up/ops_curve_shift.py b/wind_up/ops_curve_shift.py index 5c7dd32..6074794 100644 --- a/wind_up/ops_curve_shift.py +++ b/wind_up/ops_curve_shift.py @@ -21,6 +21,11 @@ class CurveTypes(str, Enum): WIND_SPEED = "windspeed" +class CurveShiftOutput(NamedTuple): + value: float + warning_msg: str | None + + CURVE_CONSTANTS = { CurveTypes.POWER_CURVE.value: {"warning_threshold": 0.01, "x_bin_width": 1}, CurveTypes.RPM.value: {"warning_threshold": 0.005, "x_bin_width": 0}, @@ -58,7 +63,7 @@ def validate_dataframes(self) -> CurveShiftInput: required_cols = {self.curve_config.x_col, self.curve_config.y_col} columns_missing_in_pre_df = required_cols - set(self.pre_df.columns) columns_missing_in_post_df = required_cols - set(self.post_df.columns) - if columns_missing_in_pre_df or columns_missing_in_post_df: + if (len(columns_missing_in_pre_df) > 0) or (len(columns_missing_in_post_df) > 0): err_msg = "Column name missing in dataframe" raise IndexError(err_msg) @@ -104,22 +109,55 @@ def check_for_ops_curve_shift( ): return results_dict - results_dict[f"{CurveTypes.POWER_CURVE.value}_shift"] = calculate_power_curve_shift( - turbine_name=wtg_name, pre_df=pre_df, post_df=post_df, x_col=scada_ws_col, y_col=pw_col + shift_power = calculate_curve_shift( + curve_shift_input=CurveShiftInput( + turbine_name=wtg_name, + pre_df=pre_df, + post_df=post_df, + curve_config=CurveConfig(name=CurveTypes.POWER_CURVE, x_col=scada_ws_col, y_col=pw_col), + ) ) - results_dict[f"{CurveTypes.RPM.value}_shift"] = calculate_rpm_curve_shift( - turbine_name=wtg_name, pre_df=pre_df, post_df=post_df, x_col=pw_col, y_col=rpm_col + shift_rpm = calculate_curve_shift( + curve_shift_input=CurveShiftInput( + turbine_name=wtg_name, + pre_df=pre_df, + post_df=post_df, + curve_config=CurveConfig(name=CurveTypes.RPM, x_col=pw_col, y_col=rpm_col), + ) ) - results_dict[f"{CurveTypes.PITCH.value}_shift"] = calculate_pitch_curve_shift( - turbine_name=wtg_name, pre_df=pre_df, post_df=post_df, x_col=scada_ws_col, y_col=pt_col + shift_pitch = calculate_curve_shift( + curve_shift_input=CurveShiftInput( + turbine_name=wtg_name, + pre_df=pre_df, + post_df=post_df, + curve_config=CurveConfig(name=CurveTypes.PITCH, x_col=scada_ws_col, y_col=pt_col), + ) ) - results_dict[f"{CurveTypes.WIND_SPEED.value}_shift"] = calculate_wind_speed_curve_shift( - turbine_name=wtg_name, pre_df=pre_df, post_df=post_df, x_col=pw_col, y_col=scada_ws_col + shift_wind_speed = calculate_curve_shift( + curve_shift_input=CurveShiftInput( + turbine_name=wtg_name, + pre_df=pre_df, + post_df=post_df, + curve_config=CurveConfig(name=CurveTypes.WIND_SPEED, x_col=pw_col, y_col=scada_ws_col), + ) ) + results_dict[f"{CurveTypes.POWER_CURVE.value}_shift"] = shift_power.value + results_dict[f"{CurveTypes.RPM.value}_shift"] = shift_rpm.value + results_dict[f"{CurveTypes.PITCH.value}_shift"] = shift_pitch.value + results_dict[f"{CurveTypes.WIND_SPEED.value}_shift"] = shift_wind_speed.value + + warning_msg = "" + for wm in [shift_power.warning_msg, shift_rpm.warning_msg, shift_pitch.warning_msg, shift_wind_speed.warning_msg]: + if wm is not None: + warning_msg += wm + + if warning_msg: + result_manager.warning(warning_msg) + if plot: compare_ops_curves_pre_post( pre_df=pre_df, @@ -137,54 +175,6 @@ def check_for_ops_curve_shift( return results_dict -def calculate_power_curve_shift( - turbine_name: str, pre_df: pd.DataFrame, post_df: pd.DataFrame, x_col: str, y_col: str -) -> float: - curve_config = CurveConfig(name=CurveTypes.POWER_CURVE.value, x_col=x_col, y_col=y_col) - - curve_shift_input = CurveShiftInput( - turbine_name=turbine_name, pre_df=pre_df, post_df=post_df, curve_config=curve_config - ) - - return _calculate_curve_shift(curve_shift_input=curve_shift_input) - - -def calculate_rpm_curve_shift( - turbine_name: str, pre_df: pd.DataFrame, post_df: pd.DataFrame, x_col: str, y_col: str -) -> float: - curve_config = CurveConfig(name=CurveTypes.RPM.value, x_col=x_col, y_col=y_col) - - curve_shift_input = CurveShiftInput( - turbine_name=turbine_name, pre_df=pre_df, post_df=post_df, curve_config=curve_config - ) - - return _calculate_curve_shift(curve_shift_input=curve_shift_input) - - -def calculate_pitch_curve_shift( - turbine_name: str, pre_df: pd.DataFrame, post_df: pd.DataFrame, x_col: str, y_col: str -) -> float: - curve_config = CurveConfig(name=CurveTypes.PITCH.value, x_col=x_col, y_col=y_col) - - curve_shift_input = CurveShiftInput( - turbine_name=turbine_name, pre_df=pre_df, post_df=post_df, curve_config=curve_config - ) - - return _calculate_curve_shift(curve_shift_input=curve_shift_input) - - -def calculate_wind_speed_curve_shift( - turbine_name: str, pre_df: pd.DataFrame, post_df: pd.DataFrame, x_col: str, y_col: str -) -> float: - curve_config = CurveConfig(name=CurveTypes.WIND_SPEED.value, x_col=x_col, y_col=y_col) - - curve_shift_input = CurveShiftInput( - turbine_name=turbine_name, pre_df=pre_df, post_df=post_df, curve_config=curve_config - ) - - return _calculate_curve_shift(curve_shift_input=curve_shift_input) - - def _required_cols_are_present( pre_df: pd.DataFrame, post_df: pd.DataFrame, turbine_name: str, required_ops_curve_columns: OpsCurveRequiredColumns ) -> bool: @@ -202,7 +192,7 @@ def _required_cols_are_present( return True -def _calculate_curve_shift(curve_shift_input: CurveShiftInput) -> float: +def calculate_curve_shift(curve_shift_input: CurveShiftInput) -> CurveShiftOutput: conf = curve_shift_input.curve_config pre_df = curve_shift_input.pre_df post_df = curve_shift_input.post_df @@ -223,8 +213,8 @@ def _calculate_curve_shift(curve_shift_input: CurveShiftInput) -> float: result = (mean_df[conf.y_col] / mean_df["expected_y"] - 1).clip(-1, 1) # log warning + warning_msg = None if abs(result) > conf.warning_threshold: warning_msg = f"{wtg_name} Ops Curve Shift warning: abs({conf.name}) > {conf.warning_threshold}: {result:.3f}" - result_manager.warning(warning_msg) - return result + return CurveShiftOutput(value=result, warning_msg=warning_msg)