Skip to content

Commit

Permalink
add wind farm LT distribution option (#14)
Browse files Browse the repository at this point in the history
* make ref subfolder

* order columns in interim results

* increase hrs_per_mps

* bump version

* more cols in combine_results

* Update pre_post_pp_analysis_expected_df.parquet

* increase ws_diff_ul

* add use_test_wtg_lt_distribution setting

* Update caching.py

only warn if cache is not fresh

* combine check_for_ops_curve_shift warnings

* add plot_pre_post_uplift_pct

* fix check_for_ops_curve_shift warning

* rename smart_missing_data_fields

* bug fix

fix case where no northing corrections are defined

* remove uplift_relative_cp

* Update smarteole_example.ipynb

* add test_calc_windfarm_lt_dfs_raw_filt

* Update northing.py

ensure wf_df are always set the same way even if a turbine has no northing corrections

* Update northing.py

fix format

* Update smarteole_example.ipynb
  • Loading branch information
aclerc authored Jul 29, 2024
1 parent ca6f29d commit d05d2de
Show file tree
Hide file tree
Showing 20 changed files with 557 additions and 436 deletions.
380 changes: 220 additions & 160 deletions examples/smarteole_example.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "res-wind-up"
version = "0.1.7"
version = "0.1.8"
authors = [
{ name = "Alex Clerc", email = "[email protected]" }
]
Expand Down
3 changes: 3 additions & 0 deletions tests/test_combine_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,15 @@ def test_brt_t16_pitch() -> None:
trdf = pd.read_csv(Path(__file__).parents[0] / "test_data/trdf_BRT_T16_pitch_Sep23.csv", index_col=0)
edf = pd.read_csv(Path(__file__).parents[0] / "test_data/tdf_BRT_T16_pitch.csv", index_col=0)
tdf = combine_results(trdf=trdf)
tdf = tdf[edf.columns.tolist()]
assert_frame_equal(edf, tdf)


def test_brt_t16_pitch_no_auto_choose() -> None:
trdf = pd.read_csv(Path(__file__).parents[0] / "test_data/trdf_BRT_T16_pitch_Sep23.csv", index_col=0)
edf = pd.read_csv(Path(__file__).parents[0] / "test_data/tdf_BRT_T16_pitch_no_auto_choose.csv", index_col=0)
tdf = combine_results(trdf=trdf, auto_choose_refs=False)
tdf = tdf[edf.columns.tolist()]
assert_frame_equal(edf, tdf)


Expand All @@ -86,4 +88,5 @@ def test_brt_t16_pitch_exclude_refs() -> None:
edf = pd.read_csv(Path(__file__).parents[0] / "test_data/tdf_BRT_T16_pitch_exclude_refs.csv", index_col=0)
# all but one ref excluded
tdf = combine_results(trdf=trdf, auto_choose_refs=False, exclude_refs=["BRT_T02", "BRT_T03", "BRT_T04", "BRT_T14"])
tdf = tdf[edf.columns.tolist()]
assert_frame_equal(edf, tdf)
Binary file modified tests/test_data/pre_post_pp_analysis_expected_df.parquet
Binary file not shown.
44 changes: 40 additions & 4 deletions tests/test_long_term.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pandas as pd
from pandas.testing import assert_frame_equal

from wind_up.long_term import calc_turbine_lt_dfs_raw_filt
from wind_up.long_term import calc_lt_dfs_raw_filt
from wind_up.models import WindUpConfig


Expand All @@ -12,16 +12,52 @@ def test_calc_turbine_lt_dfs_raw_filt(test_lsa_t13_config: WindUpConfig) -> None
test_df = pd.read_parquet(Path(__file__).parents[0] / "test_data/LSA_T13_test_df.parquet")
test_df.columns = test_df.columns.str.replace("test_", "")
test_name = "LSA_T13"
lt_wtg_df_raw, lt_wtg_df_filt = calc_turbine_lt_dfs_raw_filt(
wtg_name=test_name,
lt_wtg_df_raw, lt_wtg_df_filt = calc_lt_dfs_raw_filt(
wtg_or_wf_name=test_name,
cfg=cfg,
wtg_df=test_df,
wtg_or_wf_df=test_df,
ws_col="WindSpeedMean",
pw_col="ActivePowerMean",
one_turbine=True,
plot_cfg=None,
)

expected_raw_df = pd.read_parquet(Path(__file__).parents[0] / "test_data/LSA_T13_lt_wtg_df_raw.parquet")
expected_filt_df = pd.read_parquet(Path(__file__).parents[0] / "test_data/LSA_T13_lt_wtg_df_filt.parquet")
assert_frame_equal(lt_wtg_df_raw, expected_raw_df)
assert_frame_equal(lt_wtg_df_filt, expected_filt_df)


def test_calc_windfarm_lt_dfs_raw_filt(test_lsa_t13_config: WindUpConfig) -> None:
cfg = test_lsa_t13_config
test_df = pd.read_parquet(Path(__file__).parents[0] / "test_data/LSA_T13_test_df.parquet")
test_df.columns = test_df.columns.str.replace("test_", "")
# make a fake wf_df
test_df["TurbineName"] = "LSA_T13"
wf_df = test_df.copy().set_index(["TurbineName"], append=True).swaplevel()
for fake_wtg_name in ["LSA_T14", "LSA_T15"]:
new_df = test_df.copy()
new_df["TurbineName"] = fake_wtg_name
new_df = new_df.set_index(["TurbineName"], append=True).swaplevel()
wf_df = pd.concat([wf_df, new_df])
cfg.asset.wtgs = [x for x in cfg.asset.wtgs if x.name in {"LSA_T13", "LSA_T14", "LSA_T15"}]

lt_wtg_df_raw, lt_wtg_df_filt = calc_lt_dfs_raw_filt(
wtg_or_wf_name=cfg.asset.name,
cfg=cfg,
wtg_or_wf_df=wf_df,
ws_col="WindSpeedMean",
pw_col="ActivePowerMean",
one_turbine=False,
plot_cfg=None,
)

expected_raw_df = pd.read_parquet(Path(__file__).parents[0] / "test_data/LSA_T13_lt_wtg_df_raw.parquet")
expected_raw_df["observed_hours"] *= 3
expected_raw_df["observed_mwh"] *= 3
assert_frame_equal(lt_wtg_df_raw, expected_raw_df)

expected_filt_df = pd.read_parquet(Path(__file__).parents[0] / "test_data/LSA_T13_lt_wtg_df_filt.parquet")
expected_filt_df["observed_hours"] *= 3
expected_filt_df["observed_mwh"] *= 3
assert_frame_equal(lt_wtg_df_filt, expected_filt_df)
51 changes: 13 additions & 38 deletions tests/test_pp_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,43 +36,18 @@ def test_pre_post_pp_analysis_with_reversal(test_lsa_t13_config: WindUpConfig) -
test_df=test_df,
)

# minor changes to make actual_df compatible with old expected_df
expected_df["hours_for_mwh_calc"] = expected_df["hours_per_year"]
expected_df["hours_per_year"] = actual_df["hours_per_year"]
cols_with_new_calc = ["uplift_kw_se", "uplift_p5_kw", "uplift_p95_kw"]
expected_df[cols_with_new_calc] = actual_df[cols_with_new_calc]
new_cols = [
"pre_valid",
"post_valid",
"hours_pre_raw",
"hours_post_raw",
"is_invalid_bin",
"pw_at_mid_expected",
"pw_sem_at_mid_expected",
"relative_cp_baseline",
"relative_cp_post",
"relative_cp_sem_at_mid_expected",
"relative_cp_sem_at_mid_post",
"uplift_relative_cp",
"uplift_relative_cp_se",
"uplift_relative_cp_p5",
"uplift_relative_cp_p95",
]
expected_df[new_cols] = actual_df[new_cols]
expected_df = expected_df[actual_df.columns]

assert_frame_equal(actual_df, expected_df)
assert pp_results["pp_valid_hours"] == pytest.approx(10748.5)
assert pp_results["pp_valid_hours_pre"] == pytest.approx(5807.333333333333)
assert pp_results["pp_valid_hours"] == pytest.approx(10745.83333333333)
assert pp_results["pp_valid_hours_pre"] == pytest.approx(5804.666666666666)
assert pp_results["pp_valid_hours_post"] == pytest.approx(4941.166666666667)
assert pp_results["pp_invalid_bin_count"] == 3
assert pp_results["pp_data_coverage"] == pytest.approx(0.6793388952092024)
assert pp_results["reversal_error"] == pytest.approx(-0.008786551768533796)
assert pp_results["uplift_noadj_frc"] == pytest.approx(0.04523448345231426)
assert pp_results["poweronly_uplift_frc"] == pytest.approx(0.04560411838169785)
assert pp_results["reversed_uplift_frc"] == pytest.approx(0.03681756661316406)
assert pp_results["uplift_frc"] == pytest.approx(0.040841207568047364)
assert pp_results["missing_bins_unc_scale_factor"] == pytest.approx(1.0000000006930523)
assert pp_results["t_value_one_sigma"] == pytest.approx(1.0000168636907854)
assert pp_results["unc_one_sigma_lowerbound_frc"] == pytest.approx(0.004393275884266898)
assert pp_results["unc_one_sigma_frc"] == pytest.approx(0.004393275884266898)
assert pp_results["pp_invalid_bin_count"] == 4
assert pp_results["pp_data_coverage"] == pytest.approx(0.679170353516201)
assert pp_results["reversal_error"] == pytest.approx(-0.008804504211352544)
assert pp_results["uplift_noadj_frc"] == pytest.approx(0.04523639558619659)
assert pp_results["poweronly_uplift_frc"] == pytest.approx(0.045608171004022265)
assert pp_results["reversed_uplift_frc"] == pytest.approx(0.03680366679266972)
assert pp_results["uplift_frc"] == pytest.approx(0.04083414348052032)
assert pp_results["missing_bins_unc_scale_factor"] == pytest.approx(1.0000004657300203)
assert pp_results["t_value_one_sigma"] == pytest.approx(1.0000168659661646)
assert pp_results["unc_one_sigma_lowerbound_frc"] == pytest.approx(0.004402252105676272)
assert pp_results["unc_one_sigma_frc"] == pytest.approx(0.004402252105676272)
5 changes: 4 additions & 1 deletion wind_up/caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,14 @@ def wrapped_f(*a: Any, **kw: Any) -> pd.DataFrame: # noqa
def with_pickle_cache(fp: Path, *, use_cache: bool = True) -> Callable:
def wrap(func: Callable[..., Any]) -> Callable[..., Any]:
def wrapped_f(*a: Any, **kw: Any) -> Any: # noqa
fresh_cache = False
if not Path(fp).is_file() or not use_cache or Path(fp).stat().st_size == 0:
with Path.open(fp, "wb") as f:
pickle.dump(func(*a, **kw), f)
with Path.open(fp, "rb") as f:
fresh_cache = True
if not fresh_cache:
result_manager.warning(f"loading cached pickle {fp}")
with Path.open(fp, "rb") as f:
return pickle.load(f)

return wrapped_f
Expand Down
2 changes: 2 additions & 0 deletions wind_up/combine_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,14 @@ def calc_tdf(trdf: pd.DataFrame, ref_list: list[str], weight_col: str = "unc_wei
aggfunc=lambda x: (x * trdf.loc[x.index, weight_col]).sum() / trdf.loc[x.index, weight_col].sum(),
),
ref_count=pd.NamedAgg(column="uplift_frc", aggfunc=len),
ref_list=pd.NamedAgg(column="ref", aggfunc=lambda x: ", ".join(sorted(x))),
is_ref=pd.NamedAgg(column="test_wtg", aggfunc=lambda x: x.isin(ref_list).any()),
)
tdf["sigma_test"] = (tdf["sigma_uncorr"] + tdf["sigma_corr"]) / 2
tdf = tdf.sort_values(by=["ref_count", "test_wtg"], ascending=[False, True])
tdf = tdf.reset_index()
sigma_ref = calc_sigma_ref(tdf, ref_list)
tdf["sigma_ref"] = sigma_ref
tdf["sigma"] = tdf["sigma_test"].clip(lower=sigma_ref)
tdf["p95_uplift"] = tdf["p50_uplift"] + norm.ppf(0.05) * tdf["sigma"]
tdf["p5_uplift"] = tdf["p50_uplift"] + norm.ppf(0.95) * tdf["sigma"]
Expand Down
132 changes: 41 additions & 91 deletions wind_up/long_term.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,10 @@ def calc_lt_df(
ws_bin_edges = np.arange(0, df_for_lt[ws_col].max() + ws_bin_width, ws_bin_width)

rows_per_hour = 3600 / timebase_s
df_for_groupby = df_for_lt.reset_index()
lt_df = (
df_for_lt.dropna(subset=[ws_col, pw_col])
.groupby(by=pd.cut(df_for_lt[ws_col], bins=ws_bin_edges, retbins=False), observed=False)
df_for_groupby.dropna(subset=[ws_col, pw_col])
.groupby(by=pd.cut(df_for_groupby[ws_col], bins=ws_bin_edges, retbins=False), observed=False)
.agg(
ws_mean=pd.NamedAgg(column=ws_col, aggfunc=lambda x: x.mean()),
observed_hours=pd.NamedAgg(column=ws_col, aggfunc=lambda x: len(x) / rows_per_hour),
Expand Down Expand Up @@ -88,139 +89,88 @@ def calc_lt_df(
return lt_df


def calc_turbine_lt_df(
wtg_name: str,
def filter_and_calc_lt_df(
wtg_or_wf_name: str,
cfg: WindUpConfig,
wtg_df: pd.DataFrame,
wtg_or_wf_df: pd.DataFrame,
*,
ws_col: str,
pw_col: str,
title_end: str = "",
one_turbine: bool,
plot_cfg: PlotConfig | None = None,
) -> pd.DataFrame:
workings_df = wtg_df.copy()
workings_df = wtg_or_wf_df.copy()
if not isinstance(workings_df.index, pd.DatetimeIndex):
msg = f"wtg_df must have a DatetimeIndex, got {type(workings_df.index)}"
raise TypeError(msg)
if "TimeStamp_StartFormat" in workings_df.index.names:
workings_df = workings_df.reset_index().set_index("TimeStamp_StartFormat", drop=True)
else:
msg = (
f"workings_df must have a DatetimeIndex or index level called TimeStamp_StartFormat. "
f"{workings_df.index.names=}"
)
raise ValueError(msg)

ok_for_lt = (workings_df.index >= cfg.lt_first_dt_utc_start) & (workings_df.index <= cfg.lt_last_dt_utc_start)

lt_df = calc_lt_df(
df_for_lt=workings_df[ok_for_lt],
num_turbines=1,
num_turbines=1 if one_turbine else len(cfg.asset.wtgs),
years_for_lt_distribution=cfg.years_for_lt_distribution,
ws_col=ws_col,
ws_bin_width=cfg.ws_bin_width,
pw_col=pw_col,
timebase_s=cfg.timebase_s,
)
if plot_cfg is not None:
plot_lt_ws(lt_df=lt_df, turbine_or_wf_name=wtg_name, title_end=title_end, plot_cfg=plot_cfg, one_turbine=True)

return lt_df


def calc_turbine_lt_dfs_raw_filt(
wtg_name: str,
cfg: WindUpConfig,
wtg_df: pd.DataFrame,
*,
ws_col: str,
pw_col: str,
plot_cfg: PlotConfig | None = None,
) -> tuple[pd.DataFrame, pd.DataFrame]:
lt_wf_df_raw = calc_turbine_lt_df(
wtg_name=wtg_name,
cfg=cfg,
wtg_df=wtg_df,
ws_col=RAW_WINDSPEED_COL,
pw_col=RAW_POWER_COL,
title_end="before filter",
plot_cfg=plot_cfg,
)
lt_wf_df_filt = calc_turbine_lt_df(
wtg_name=wtg_name,
cfg=cfg,
wtg_df=wtg_df,
ws_col=ws_col,
pw_col=pw_col,
title_end="after filter",
plot_cfg=plot_cfg,
)
if plot_cfg is not None:
plot_lt_ws_raw_filt(
lt_df_raw=lt_wf_df_raw,
lt_df_filt=lt_wf_df_filt,
turbine_or_wf_name=wtg_name,
plot_lt_ws(
lt_df=lt_df,
turbine_or_wf_name=wtg_or_wf_name,
title_end=title_end,
plot_cfg=plot_cfg,
one_turbine=True,
one_turbine=one_turbine,
)

return lt_wf_df_raw, lt_wf_df_filt


def calc_windfarm_lt_df(
cfg: WindUpConfig,
wf_df: pd.DataFrame,
*,
ws_col: str,
pw_col: str,
title_end: str = "",
plot_cfg: PlotConfig | None = None,
) -> pd.DataFrame:
workings_df = wf_df.copy()
if len(workings_df.index.levels) == 2: # noqa PLR2004
workings_df.index = workings_df.index.droplevel("TurbineName")

ok_for_lt = (workings_df.index >= cfg.lt_first_dt_utc_start) & (workings_df.index <= cfg.lt_last_dt_utc_start)

lt_df = calc_lt_df(
df_for_lt=workings_df[ok_for_lt],
num_turbines=len(cfg.asset.wtgs),
years_for_lt_distribution=cfg.years_for_lt_distribution,
ws_col=ws_col,
ws_bin_width=cfg.ws_bin_width,
pw_col=pw_col,
timebase_s=cfg.timebase_s,
)

if plot_cfg is not None:
plot_lt_ws(lt_df=lt_df, turbine_or_wf_name=cfg.asset.name, title_end=title_end, plot_cfg=plot_cfg)

return lt_df


def calc_windfarm_lt_dfs_raw_filt(
def calc_lt_dfs_raw_filt(
wtg_or_wf_name: str,
cfg: WindUpConfig,
wf_df_raw: pd.DataFrame,
wf_df_filt: pd.DataFrame,
wtg_or_wf_df: pd.DataFrame,
*,
ws_col: str,
pw_col: str,
one_turbine: bool,
plot_cfg: PlotConfig | None = None,
) -> tuple[pd.DataFrame, pd.DataFrame]:
lt_wf_df_raw = calc_windfarm_lt_df(
lt_df_raw = filter_and_calc_lt_df(
wtg_or_wf_name=wtg_or_wf_name,
cfg=cfg,
wf_df=wf_df_raw,
ws_col=ws_col,
pw_col=pw_col,
wtg_or_wf_df=wtg_or_wf_df,
ws_col=RAW_WINDSPEED_COL,
pw_col=RAW_POWER_COL,
title_end="before filter",
one_turbine=one_turbine,
plot_cfg=plot_cfg,
)
lt_wf_df_filt = calc_windfarm_lt_df(
lt_df_filt = filter_and_calc_lt_df(
wtg_or_wf_name=wtg_or_wf_name,
cfg=cfg,
wf_df=wf_df_filt,
wtg_or_wf_df=wtg_or_wf_df,
ws_col=ws_col,
pw_col=pw_col,
title_end="after filter",
one_turbine=one_turbine,
plot_cfg=plot_cfg,
)
if plot_cfg is not None:
plot_lt_ws_raw_filt(
lt_df_raw=lt_wf_df_raw,
lt_df_filt=lt_wf_df_filt,
turbine_or_wf_name=cfg.asset.name,
lt_df_raw=lt_df_raw,
lt_df_filt=lt_df_filt,
wtg_or_wf_name=wtg_or_wf_name,
plot_cfg=plot_cfg,
one_turbine=one_turbine,
)

return lt_wf_df_raw, lt_wf_df_filt
return lt_df_raw, lt_df_filt
Loading

0 comments on commit d05d2de

Please sign in to comment.